
    )il              #          S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
Jr  SSKJrJrJrJrJr  SSKJr  SSKJrJr  SS	KJrJr  SS
KJrJr  SSKJr  SSKJ r J!r!J"r"               S#S\S\"S\#S\#S\
\   S\$S\
\\$      S\#S\%S\$S\#S\
\#   S\&S\&S\&S\SS4"S jjr'      S$S\!S\#S\
\\$      S\$S \&S\#S\
\   S\#4S! jjr(     S%S\!S\#S\
\\$      S\$S \&S\
\   S\	\   4S" jjr)g)&zIFunctions that can be used for the most common use-cases for pdfminer.six    N)StringIO)AnyBinaryIO	ContainerIteratorOptionalcast   )XMLConverterHTMLConverterTextConverterPDFPageAggregatorHOCRConverter)ImageWriter)LAParamsLTPage)	PDFDeviceTagExtractor)PDFResourceManagerPDFPageInterpreter)PDFPage)open_filename
FileOrNameAnyIOinfoutfpoutput_typecodeclaparamsmaxpagespage_numberspasswordscalerotation
layoutmode
output_dirstrip_controldebugdisable_cachingkwargsreturnc           
          U(       a2  [         R                  " 5       R                  [         R                  5        SnU(       a  [	        U5      n[        U(       + S9nSnUS:w  a.  U[        R                  :X  a  [        R                  R                  nUS:X  a  [        UXUUS9nOmUS:X  a  [        UUUUUUS9nOXUS:X  a  [        UUUUU
UUS9nOBUS	:X  a  [        UXXLS
9nO0US:X  a  [        U[        [        U5      US9nOSU 3n[!        U5      eUc   e[#        UU5      n[$        R&                  " U UUUU(       + S9 H+  nUR(                  U	-   S-  Ul        UR+                  U5        M-     UR-                  5         g)a  Parses text from inf-file and writes to outfp file-like object.

Takes loads of optional arguments but the defaults are somewhat sane.
Beware laparams: Including an empty LAParams is not the same as passing
None!

:param inf: a file-like object to read PDF structure from, such as a
    file handler (using the builtin `open()` function) or a `BytesIO`.
:param outfp: a file-like object to write the text to.
:param output_type: May be 'text', 'xml', 'html', 'hocr', 'tag'.
    Only 'text' works properly.
:param codec: Text decoding codec
:param laparams: An LAParams object from pdfminer.layout. Default is None
    but may not layout correctly.
:param maxpages: How many pages to stop parsing after
:param page_numbers: zero-indexed page numbers to operate on.
:param password: For encrypted PDFs, the password to decrypt.
:param scale: Scale factor
:param rotation: Rotation factor
:param layoutmode: Default is 'normal', see
    pdfminer.converter.HTMLConverter
:param output_dir: If given, creates an ImageWriter for extracted images.
:param strip_control: Does what it says on the tin
:param debug: Output more logging data
:param disable_caching: Does what it says on the tin
:param other:
:return: nothing, acting as it does on two streams. Use StringIO to get
    strings.
Ncachingtext)r   r   imagewriterxml)r   r   r0   stripcontrolhtml)r   r#   r%   r   r0   hocr)r   r   r2   tag)r   z1Output type can be text, html, xml or tag but is r    r"   r.   ih  )logging	getLoggersetLevelDEBUGr   r   sysstdoutbufferr   r   r   r   r   r	   r   
ValueErrorr   r   	get_pagesrotateprocess_pageclose)r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r0   rsrcmgrdevicemsginterpreterpages                         S/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/pdfminer/high_level.pyextract_text_to_fprI      s   ^ $$W]]3K!*- _)<=G"&Ff#**!4

!!fU(
 
	#&
 
	!#
 
	U(
 
	gtHe'<EJ Bk]So$Wf5K!!## {{X-4  & LLN    pdf_filer.   c           
         Uc
  [        5       n[        U S5       n[        5        n[        [        U5      n[        US9n	[        XXVS9n
[        X5      n[        R                  " UUUUUS9 H  nUR                  U5        M     UR                  5       sSSS5        sSSS5        $ ! , (       d  f       O= fSSS5        g! , (       d  f       g= f)aK  Parse and return the text contained in a PDF file.

:param pdf_file: Either a file path or a file-like object for the PDF file
    to be worked on.
:param password: For encrypted PDFs, the password to decrypt.
:param page_numbers: List of zero-indexed page numbers to extract.
:param maxpages: The maximum number of pages to parse
:param caching: If resources should be cached
:param codec: Text decoding codec
:param laparams: An LAParams object from pdfminer.layout. If None, uses
    some default settings that often work well.
:return: a string containing all of the text extracted.
Nrbr-   )r   r   r6   )r   r   r   r	   r   r   r   r   r   r?   rA   getvalue)rK   r"   r!   r    r.   r   r   fpoutput_stringrC   rD   rF   rG   s                rH   extract_textrQ      s    , :	x	&"hjM(B$W5wUV(9%%
D $$T*
 %%' /9j	&	&jj	&	&	&s#   C A.B&	C &
B4	0C  
Cc           	   #   P  #    Uc
  [        5       n[        U S5       n[        [        U5      n[	        US9n[        XuS9n[        Xx5      n	[        R                  " XbX1US9 H(  n
U	R                  U
5        UR                  5       nUv   M*     SSS5        g! , (       d  f       g= f7f)a  Extract and yield LTPage objects

:param pdf_file: Either a file path or a file-like object for the PDF file
    to be worked on.
:param password: For encrypted PDFs, the password to decrypt.
:param page_numbers: List of zero-indexed page numbers to extract.
:param maxpages: The maximum number of pages to parse
:param caching: If resources should be cached
:param laparams: An LAParams object from pdfminer.layout. If None, uses
    some default settings that often work well.
:return: LTPage objects
NrM   r-   )r   r6   )r   r   r	   r   r   r   r   r   r?   rA   
get_result)rK   r"   r!   r    r.   r   rO   resource_managerrD   rF   rG   layouts               rH   extract_pagesrV      s     ( :	x	&"(B-g>"#3G()9B%%xG
D $$T*&&(FL
 
'	&	&s   B&A0B	B&
B#B&)r/   utf-8Nr   N g      ?r   normalNFFF)rX   Nr   TrW   N)rX   Nr   TN)*__doc__r7   r;   ior   typingr   r   r   r   r   r	   	converterr   r   r   r   r   imager   rU   r   r   	pdfdevicer   r   	pdfinterpr   r   pdfpager   utilsr   r   r   strintfloatboolrI   rQ   rV    rJ   rH   <module>rh      s   O  
  E E   $ . =  3 3 #'-1 $!o	oo o 	o
 x o o 9S>*o o o o o o o o o  !o" 
#oh -1#'(((((( 9S>*(( 	((
 (( (( x (( 	((Z -1#'!!! 9S>*! 	!
 ! x ! f!rJ   