from utils.text import KEYWORDS, best_label
import re

def classify_text(text: str) -> str:
    t = (text or "").lower()
    scores = {k:0 for k in KEYWORDS}
    lines = [l for l in t.splitlines() if l.strip()]
    for k, labels in KEYWORDS.items():
        for l in lines[:40]:
            scores[k] += best_label(l, labels) / max(1, len(lines))
    # regex hints
    if re.search(r"\b(\d{4}[ -]?\d{4}[ -]?\d{4})\b", t):
        scores['aadhaar'] += 25
    if re.search(r"\b(epic|elector|voter)\b", t):
        scores['voter'] += 20
    if re.search(r"\b(khata|khatian|খত|খতিয়ান|dag|দাগ|plot)\b", t):
        scores['land'] += 20
    return max(scores, key=lambda k: scores[k])
