import os
import glob
import json
import numpy as np
import cv2
from insightface.app import FaceAnalysis
# CONFIG
DATASET_DIR = "dataset"
OUT_DB = "faces_db.npz"
OUT_LABELS = "faces_labels.json"
# Use "buffalo_s" for Raspberry Pi 5 (Fast Model)
app = FaceAnalysis(name="buffalo_s", providers=["CPUExecutionProvider"])
app.prepare(ctx_id=0, det_size=(640, 640))
embs = []
labels = []
print("--- Starting Enrollment ---")
people = sorted([d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))])
for person in people:
files = glob.glob(os.path.join(DATASET_DIR, person, "*.*"))
print(f"Processing {person}: {len(files)} images found.")
person_embs = []
for path in files:
img = cv2.imread(path)
if img is None: continue
faces = app.get(img)
if not faces:
print(f" [SKIP] No face detected in {os.path.basename(path)}")
continue
# Get largest face
face = max(faces, key=lambda f: (f.bbox[2]-f.bbox[0])*(f.bbox[3]-f.bbox[1]))
# Normalize embedding
norm_emb = face.embedding / np.linalg.norm(face.embedding)
person_embs.append(norm_emb)
if not person_embs:
print(f" [WARN] No valid faces found for {person}!")
continue
# Store all valid embeddings
for emb in person_embs:
embs.append(emb)
labels.append(person)
print(f" [OK] Enrolled {len(person_embs)} features for {person}")
if embs:
embs = np.array(embs, dtype=np.float32)
np.savez_compressed(OUT_DB, embs=embs)
with open(OUT_LABELS, "w") as f:
json.dump(labels, f)
print(f"\nSuccess! Saved {len(embs)} embeddings to {OUT_DB}")
else:
print("\nFailed. No embeddings generated.")