import cv2
import numpy as np
from pdf2image import convert_from_path

# Load first page of the PDF
images = convert_from_path("Self Delcaration - Farmer Consent_Combined - Bengali_Final.pdf", dpi=300)
img = cv2.cvtColor(np.array(images[0]), cv2.COLOR_RGB2BGR)

# Convert to grayscale and threshold
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Draw bounding boxes around checkbox-like shapes
for cnt in contours:
    x, y, w, h = cv2.boundingRect(cnt)
    aspect_ratio = w / float(h)
    if 10 < w < 50 and 10 < h < 50 and 0.8 < aspect_ratio < 1.2:
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

# Save the debug image
cv2.imwrite("checkbox_debug.jpg", img)
print("Saved checkbox_debug.jpg with highlighted boxes.")