import re
from dataclasses import dataclass
from rapidfuzz import fuzz

AADHAAR_RX = re.compile(r"\b(\d{4}[ -]?\d{4}[ -]?\d{4})\b")
EPIC_RX = re.compile(r"\b([A-Z]{2,3}[0-9]{6,8})\b")
DOB_RX = re.compile(r"\b(\d{1,2}[/-]\d{1,2}[/-](?:\d{2}|\d{4}))\b")
PIN_RX = re.compile(r"\b(\d{6})\b")

KEYWORDS = {
  "aadhaar": ["aadhaar","uidai","आधार","आधार संख्या","आधार नं","unique identification"],
  "voter": ["election commission","voter","epic","elector","voter id","इलेक्","নামপত্র","EPIC"],
  "land": ["khata","khatian","plot","dag","खतियान","खातान","survey","khata no","khatian no"],
}

@dataclass
class Match:
    value: str
    start: int
    end: int
    type: str
    confidence: float

def mask_aadhaar(uid: str) -> str:
    if not uid:
        return None
    digits = re.sub(r"\D", "", uid)
    if len(digits) >= 4:
        return "XXXX-XXXX-" + digits[-4:]
    return uid

def best_label(line: str, labels):
    return max((fuzz.partial_ratio(line.lower(), l) for l in labels), default=0)
