
    ni              5       j   % S r SSKrSSKrSSKrSSKrSSKrSSKrSSKJr  SSK	J
r
  SSKJr  SSKJr  SSKJrJrJrJrJrJrJrJr  SSKrSSKrSS	KJrJr  SS
KJ r   SSK!J"r"  SSK#J$r$  SSK%J&r&J'r'  \" \(5      r)Sq*\\\\4      \+S'       S5S\'S\\\,      S\\,   S\,S\-S\,4S jjr.                      S6S\&S\\,   S\,S\\\,      S\\\,\4      S\-S\\,   S\-S\\\,\/\\/   4      S\-S\\\\0   \\\0      4      S\-S \-S!\-S"\\,   S#\\-   S$\\\0      S%\-S&\\,   S'\\,   S(\\,   S\-S\,S\\\Rb                     \\,\4   4   40S) jjr2                    S7S\&S*\&S\\\,\4      S\,S\\\,      S\\,   S\-S\\\,\/\\/   4      S\-S\\\\0   \\\0      4      S\-S \-S!\-S"\\,   S#\\-   S$\\\0      S%\-S&\\,   S'\\,   S(\\,   S\-S\\,   S\\Rb                     4.S+ jjr3                S8S\&S(\,S\,S\\\,      S\\\,\/\\/   4      S\-S\\\\0   \\\0      4      S\-S \-S!\-S"\\,   S#\\-   S$\\\0      S%\-S&\\,   S'\\,   S\-S\,SS4&S, jjr4                S8S-\,S\,S\\\,      S\\\,\/\\/   4      S\-S\\\\0   \\\0      4      S\-S \-S!\-S"\\,   S#\\-   S$\\\0      S%\-S&\\,   S(\\,   S\-S\,SS4$S. jjr5 S9S/\\\,      S\,S\\,   4S0 jjr6S:S\,S\,4S1 jjr7 S;S2\\   S\\\,\4      S\\Rb                     4S3 jjr8S\\,\4   S$\\,   S\\,\\/   4   4S4 jr9g)<aQ  This module is a wrapper of tabula, which enables table extraction from a PDF.

This module extracts tables from a PDF into a pandas DataFrame via jpype.

Instead of importing this module, you can import public interfaces such as
:func:`read_pdf()`, :func:`read_pdf_with_template()`, :func:`convert_into()`,
:func:`convert_into_by_batch()` from `tabula` module directory.

Note:
    If you want to use your own tabula-java JAR file, set ``TABULA_JAR`` to
    environment variable for JAR path.

Example:

    >>> import tabula
    >>> dfs = tabula.read_pdf("/path/to/sample.pdf", pages="all")
    N)defaultdict)deepcopy)asdict)	getLogger)AnyDictIterableListOptionalSequenceTupleUnion   )SubprocessTabulaTabulaVm)CSVParseError)localize_file)load_template)FileLikeObjTabulaOption
_tabula_vmoptionsjava_optionspathencodingforce_subprocessreturnc                    1 Skn[        X5      nU(       a  [        XR                  US9q[        (       dH  [	        XR                  S9q[        (       a)  [        R
                  (       d  [        XR                  US9qO`[        [        [        5      (       a  [        R                  X1U R                  S9  O([        U5      U-
  (       a  [        R                  S5        [        R                  X5      $ )zCall tabula-java with the given lists of Java options and tabula-py
options, as well as an optional path to pass to tabula-java as a regular
argument to use for any required output sent to stderr.
>   -Dfile.encoding=UTF8-Djava.awt.headless=true,-Dorg.slf4j.simpleLogger.defaultLogLevel=offH-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.NoOpLog)r   silentr   )r   r#   )r   r   r#   z;java_options is ignored until rebooting the Python process.)_build_java_optionsr   r#   r   r   tabula
isinstanceupdate_encodingsetloggerwarningcall_tabula_java)r   r   r   r   r   IGNORED_JAVA_OPTIONSs         E/var/www/html/land-ocr/venv/lib/python3.13/site-packages/tabula/io.py_runr.   .   s     '|>L %%nnx

 :<O
:j//))..8J 
J 0	1	1"" 	# 	
 
\	1	1TU&&w55    
input_pathoutput_formatpandas_optionsmultiple_tables
user_agentuse_raw_urlpagesguessarearelative_arealatticestreampasswordr#   columnsrelative_columnsformatbatchoutput_pathc                    SnU(       a=  SnUR                  5       S:X  a  O&UR                  5       S:X  a  SnO[        SU< 35      eU(       a  Sn[        UU	U
UUUUUUUUUUUUS9n[        XUS9u  nn[        R
                  R                  U5      (       d=  [        [        R                  [        R                  " [        R                  5      U5      e[        R
                  R                  U5      S	:X  a  [        U S
35      e [        UUUUUS9nU(       a  [        R                  " U5        [        U5      S	:X  a  [        R!                  S5        / $ Uc  0 n[#        U5      nUR$                  nUS:X  a+  [&        R(                  " U5      nU(       a  [+        UU5      $ U$ UR-                  SU5      US'    [.        R0                  " [2        R4                  " U5      40 UD6/$ ! U(       a  [        R                  " U5        f f = f! [.        R6                  R8                   a  nSnUS-  n[;        UU5      eSnAff = f)a./  Read tables in PDF.

Args:
    input_path (str, path object or file-like object):
        File like object of target PDF file.
        It can be URL, which is downloaded by tabula-py automatically.
    output_format (str, optional):
        Output format for returned object (``dataframe`` or ``json``)
        Giving this option enforces to ignore `multiple_tables` option.
    encoding (str, optional):
        Encoding type for pandas. Default: ``utf-8``
    java_options (list, optional):
        Set java options. This option will be ignored once JVM is launched.

        Example:
            ``["-Xmx256m"]``
    pandas_options (dict, optional):
        Set pandas options.

        Example:
            ``{'header': None}``

        Note:
            With ``multiple_tables=True`` (default), pandas_options is passed
            to pandas.DataFrame, otherwise it is passed to pandas.read_csv.
            Those two functions are different for accept options like ``dtype``.
    multiple_tables (bool):
        It enables to handle multiple tables within a page. Default: ``True``

        Note:
            If `multiple_tables` option is enabled, tabula-py uses not
            :func:`pd.read_csv()`, but :func:`pd.DataFrame()`. Make
            sure to pass appropriate `pandas_options`.
    user_agent (str, optional):
        Set a custom user-agent when download a pdf from a url. Otherwise
        it uses the default ``urllib.request`` user-agent.
    use_raw_url (bool):
        It enforces to use `input_path` string for url without quoting/dequoting.
        Default: False
    pages (str, int, `iterable` of `int`, optional):
        An optional values specifying pages to extract from. It allows
        `str`,`int`, `iterable` of :`int`. Default: `1`

        Examples:
            ``'1-2,3'``, ``'all'``, ``[1,2]``
    guess (bool, optional):
        Guess the portion of the page to analyze per page. Default `True`
        If you use "area" option, this option becomes `False`.

        Note:
            As of tabula-java 1.0.3, guess option becomes independent from
            lattice and stream option, you can use guess and lattice/stream option
            at the same time.

    area (iterable of float, iterable of iterable of float, optional):
        Portion of the page to analyze(top,left,bottom,right).
        Default is entire page.

        Note:
            If you want to use multiple area options and extract in one table, it
            should be better to set ``multiple_tables=False`` for :func:`read_pdf()`

        Examples:
            ``[269.875,12.75,790.5,561]``,
            ``[[12.1,20.5,30.1,50.2], [1.0,3.2,10.5,40.2]]``

    relative_area (bool, optional):
        If all area values are between 0-100 (inclusive) and preceded by ``'%'``,
        input will be taken as % of actual height or width of the page.
        Default ``False``.
    lattice (bool, optional):
        Force PDF to be extracted using lattice-mode extraction
        (if there are ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    stream (bool, optional):
        Force PDF to be extracted using stream-mode extraction
        (if there are no ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    password (str, optional):
        Password to decrypt document. Default: empty
    silent (bool, optional):
        Suppress all stderr output.
    columns (Sequence, optional):
        X coordinates of column boundaries. Must be sorted and of a datatype that
        preserves order, e.g. tuple or list

        Example:
            ``[10.1, 20.2, 30.3]``
    relative_columns (bool, optional):
        If all values are between 0-100 (inclusive) and preceded by '%',
        input will be taken as % of actual width of the page.
        Default ``False``.
    format (str, optional):
        Format for output file or extracted object.
        (``"CSV"``, ``"TSV"``, ``"JSON"``)
    batch (str, optional):
        Convert all PDF files in the provided directory. This argument should be
        directory path.
    output_path (str, optional):
        Output file path. File format of it is depends on ``format``.
        Same as ``--outfile`` option of tabula-java.
    force_subprocess (bool):
        Force to use tabula-java subprocess mode. If you have some issue with
        jpype, try this option with same environment.
        Default ``False``.
    options (str, optional):
        Raw option string for tabula-java.

Returns:
    list of DataFrames or dict.

Raises:
    FileNotFoundError:
        If downloaded remote file doesn't exist.

    ValueError:
        If output_format is unknown format, or if downloaded remote file size is 0.

    tabula.errors.CSVParseError:
        If pandas CSV parsing failed.

    tabula.errors.JavaNotFoundError:
        If java is not installed or found.

    subprocess.CalledProcessError:
        If tabula-java execution failed.

Examples:

    Here is a simple example.
    Note that :func:`read_pdf()` only extract page 1 by default.

    Notes:
        As of tabula-py 2.0.0, :func:`read_pdf()` sets `multiple_tables=True` by
        default. If you want to get consistent output with previous version, set
        `multiple_tables=False`.

    >>> import tabula
    >>> pdf_path = "https://github.com/chezou/tabula-py/raw/master/tests/resources/data.pdf"
    >>> tabula.read_pdf(pdf_path, stream=True)
    [             Unnamed: 0   mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear  carb
    0             Mazda RX4  21.0    6  160.0  110  3.90  2.620  16.46   0   1     4     4
    1         Mazda RX4 Wag  21.0    6  160.0  110  3.90  2.875  17.02   0   1     4     4
    2            Datsun 710  22.8    4  108.0   93  3.85  2.320  18.61   1   1     4     1
    3        Hornet 4 Drive  21.4    6  258.0  110  3.08  3.215  19.44   1   0     3     1
    4     Hornet Sportabout  18.7    8  360.0  175  3.15  3.440  17.02   0   0     3     2
    5               Valiant  18.1    6  225.0  105  2.76  3.460  20.22   1   0     3     1
    6            Duster 360  14.3    8  360.0  245  3.21  3.570  15.84   0   0     3     4
    7             Merc 240D  24.4    4  146.7   62  3.69  3.190  20.00   1   0     4     2
    8              Merc 230  22.8    4  140.8   95  3.92  3.150  22.90   1   0     4     2
    9              Merc 280  19.2    6  167.6  123  3.92  3.440  18.30   1   0     4     4
    10            Merc 280C  17.8    6  167.6  123  3.92  3.440  18.90   1   0     4     4
    11           Merc 450SE  16.4    8  275.8  180  3.07  4.070  17.40   0   0     3     3
    12           Merc 450SL  17.3    8  275.8  180  3.07  3.730  17.60   0   0     3     3
    13          Merc 450SLC  15.2    8  275.8  180  3.07  3.780  18.00   0   0     3     3
    14   Cadillac Fleetwood  10.4    8  472.0  205  2.93  5.250  17.98   0   0     3     4
    15  Lincoln Continental  10.4    8  460.0  215  3.00  5.424  17.82   0   0     3     4
    16    Chrysler Imperial  14.7    8  440.0  230  3.23  5.345  17.42   0   0     3     4
    17             Fiat 128  32.4    4   78.7   66  4.08  2.200  19.47   1   1     4     1
    18          Honda Civic  30.4    4   75.7   52  4.93  1.615  18.52   1   1     4     2
    19       Toyota Corolla  33.9    4   71.1   65  4.22  1.835  19.90   1   1     4     1
    20        Toyota Corona  21.5    4  120.1   97  3.70  2.465  20.01   1   0     3     1
    21     Dodge Challenger  15.5    8  318.0  150  2.76  3.520  16.87   0   0     3     2
    22          AMC Javelin  15.2    8  304.0  150  3.15  3.435  17.30   0   0     3     2
    23           Camaro Z28  13.3    8  350.0  245  3.73  3.840  15.41   0   0     3     4
    24     Pontiac Firebird  19.2    8  400.0  175  3.08  3.845  17.05   0   0     3     2
    25            Fiat X1-9  27.3    4   79.0   66  4.08  1.935  18.90   1   1     4     1
    26        Porsche 914-2  26.0    4  120.3   91  4.43  2.140  16.70   0   1     5     2
    27         Lotus Europa  30.4    4   95.1  113  3.77  1.513  16.90   1   1     5     2
    28       Ford Pantera L  15.8    8  351.0  264  4.22  3.170  14.50   0   1     5     4
    29         Ferrari Dino  19.7    6  145.0  175  3.62  2.770  15.50   0   1     5     6
    30        Maserati Bora  15.0    8  301.0  335  3.54  3.570  14.60   0   1     5     8
    31           Volvo 142E  21.4    4  121.0  109  4.11  2.780  18.60   1   1     4     2]

    If you want to extract all pages, set ``pages="all"``.

    >>> dfs = tabula.read_pdf(pdf_path, pages="all")
    >>> len(dfs)
    4
    >>> dfs
    [       0    1      2    3     4      5      6   7   8     9
    0    mpg  cyl   disp   hp  drat     wt   qsec  vs  am  gear
    1   21.0    6  160.0  110  3.90  2.620  16.46   0   1     4
    2   21.0    6  160.0  110  3.90  2.875  17.02   0   1     4
    3   22.8    4  108.0   93  3.85  2.320  18.61   1   1     4
    4   21.4    6  258.0  110  3.08  3.215  19.44   1   0     3
    5   18.7    8  360.0  175  3.15  3.440  17.02   0   0     3
    6   18.1    6  225.0  105  2.76  3.460  20.22   1   0     3
    7   14.3    8  360.0  245  3.21  3.570  15.84   0   0     3
    8   24.4    4  146.7   62  3.69  3.190  20.00   1   0     4
    9   22.8    4  140.8   95  3.92  3.150  22.90   1   0     4
    10  19.2    6  167.6  123  3.92  3.440  18.30   1   0     4
    11  17.8    6  167.6  123  3.92  3.440  18.90   1   0     4
    12  16.4    8  275.8  180  3.07  4.070  17.40   0   0     3
    13  17.3    8  275.8  180  3.07  3.730  17.60   0   0     3
    14  15.2    8  275.8  180  3.07  3.780  18.00   0   0     3
    15  10.4    8  472.0  205  2.93  5.250  17.98   0   0     3
    16  10.4    8  460.0  215  3.00  5.424  17.82   0   0     3
    17  14.7    8  440.0  230  3.23  5.345  17.42   0   0     3
    18  32.4    4   78.7   66  4.08  2.200  19.47   1   1     4
    19  30.4    4   75.7   52  4.93  1.615  18.52   1   1     4
    20  33.9    4   71.1   65  4.22  1.835  19.90   1   1     4
    21  21.5    4  120.1   97  3.70  2.465  20.01   1   0     3
    22  15.5    8  318.0  150  2.76  3.520  16.87   0   0     3
    23  15.2    8  304.0  150  3.15  3.435  17.30   0   0     3
    24  13.3    8  350.0  245  3.73  3.840  15.41   0   0     3
    25  19.2    8  400.0  175  3.08  3.845  17.05   0   0     3
    26  27.3    4   79.0   66  4.08  1.935  18.90   1   1     4
    27  26.0    4  120.3   91  4.43  2.140  16.70   0   1     5
    28  30.4    4   95.1  113  3.77  1.513  16.90   1   1     5
    29  15.8    8  351.0  264  4.22  3.170  14.50   0   1     5
    30  19.7    6  145.0  175  3.62  2.770  15.50   0   1     5
    31  15.0    8  301.0  335  3.54  3.570  14.60   0   1     5,               0            1             2            3        4
    0  Sepal.Length  Sepal.Width  Petal.Length  Petal.Width  Species
    1           5.1          3.5           1.4          0.2   setosa
    2           4.9          3.0           1.4          0.2   setosa
    3           4.7          3.2           1.3          0.2   setosa
    4           4.6          3.1           1.5          0.2   setosa
    5           5.0          3.6           1.4          0.2   setosa
    6           5.4          3.9           1.7          0.4   setosa,      0             1            2             3            4          5
    0  NaN  Sepal.Length  Sepal.Width  Petal.Length  Petal.Width    Species
    1  145           6.7          3.3           5.7          2.5  virginica
    2  146           6.7          3.0           5.2          2.3  virginica
    3  147           6.3          2.5           5.0          1.9  virginica
    4  148           6.5          3.0           5.2          2.0  virginica
    5  149           6.2          3.4           5.4          2.3  virginica
    6  150           5.9          3.0           5.1          1.8  virginica,        0
    0   supp
    1     VC
    2     VC
    3     VC
    4     VC
    5     VC
    6     VC
    7     VC
    8     VC
    9     VC
    10    VC
    11    VC
    12    VC
    13    VC
    14    VC]
NF	dataframejsonJSONUnknown output_format=)r6   r7   r8   r9   r:   r;   r<   r#   r=   r>   r?   r@   rA   r   r3   )r5   r   3 is empty. Check the file, or download it manually.)r   r   zThe output file is empty.r   z?Error failed to create DataFrame with different column tables.
zNTry to set `multiple_tables=True`or set `names` option for `pandas_options`. 
)lower
ValueErrorr   r   osr   existsFileNotFoundErrorerrnoENOENTstrerrorgetsizer.   unlinklenr)   r*   r   r?   rD   loads_extract_fromgetpdread_csvioStringIOerrorsParserErrorr   ) r0   r1   r   r   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r#   r=   r>   r?   r@   rA   r   r   tabula_optionsr   	temporaryoutput_pandas_optionsfmtraw_jsonemessages                                    r-   read_pdfrd   Y   s&   Z F K/  "f,F6'7899!#)'N$ $JTOD)77>>$bkk%,,.GNN	wwt!D6!TUVV
-
 IIdO
6{a23	~.O


C
f}"jj0 ?;;O '6&9&9*h&O
#		,KKF 3GGHH/ IIdO 0 yy$$ 	,XGAG
  ++	,s$   G& :+H	 & H	H?'H::H?template_pathc                    [        XSUS9u  nn[        U5      n[        UUU	U
UUUUUUUUUUS9n/ n U He  n[        U 4UUUUS.[	        UR                  U5      5      D6n[        U[        5      (       a  UR                  U5        MT  UR                  U5        Mg     U(       a  [        R                  " U5        U$ ! U(       a  [        R                  " U5        f f = f)a"  Read tables in PDF with a Tabula App template.

Args:
    input_path (str, path object or file-like object):
        File like object of target PDF file.
        It can be URL, which is downloaded by tabula-py automatically.
    template_path (str, path object or file-like object):
        File like object for Tabula app template.
        It can be URL, which is downloaded by tabula-py automatically.
    pandas_options (dict, optional):
        Set pandas options like {'header': None}.
    encoding (str, optional):
        Encoding type for pandas. Default is 'utf-8'
    java_options (list, optional):
        Set java options like ``["-Xmx256m"]``.
        This option will be ignored once JVM is launched.
    user_agent (str, optional):
        Set a custom user-agent when download a pdf from a url. Otherwise
        it uses the default ``urllib.request`` user-agent.
    use_raw_url (bool):
        It enforces to use `input_path` string for url without quoting/dequoting.
        Default: False
    pages (str, int, `iterable` of `int`, optional):
        An optional values specifying pages to extract from. It allows
        `str`,`int`, `iterable` of :`int`. Default: `1`

        Examples:
            ``'1-2,3'``, ``'all'``, ``[1,2]``
    guess (bool, optional):
        Guess the portion of the page to analyze per page. Default `True`
        If you use "area" option, this option becomes `False`.

        Note:
            As of tabula-java 1.0.3, guess option becomes independent from
            lattice and stream option, you can use guess and lattice/stream option
            at the same time.

    area (iterable of float, iterable of iterable of float, optional):
        Portion of the page to analyze(top,left,bottom,right).
        Default is entire page.

        Note:
            If you want to use multiple area options and extract in one table, it
            should be better to set ``multiple_tables=False`` for :func:`read_pdf()`

        Examples:
            ``[269.875,12.75,790.5,561]``,
            ``[[12.1,20.5,30.1,50.2], [1.0,3.2,10.5,40.2]]``

    relative_area (bool, optional):
        If all area values are between 0-100 (inclusive) and preceded by ``'%'``,
        input will be taken as % of actual height or width of the page.
        Default ``False``.
    lattice (bool, optional):
        Force PDF to be extracted using lattice-mode extraction
        (if there are ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    stream (bool, optional):
        Force PDF to be extracted using stream-mode extraction
        (if there are no ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    password (str, optional):
        Password to decrypt document. Default: empty
    silent (bool, optional):
        Suppress all stderr output.
    columns (Sequence, optional):
        X coordinates of column boundaries. Must be sorted and of a datatype that
        preserves order, e.g. tuple or list

        Example:
            ``[10.1, 20.2, 30.3]``
    relative_columns (bool, optional):
        If all values are between 0-100 (inclusive) and preceded by '%',
        input will be taken as % of actual width of the page.
        Default ``False``.
    format (str, optional):
        Format for output file or extracted object.
        (``"CSV"``, ``"TSV"``, ``"JSON"``)
    batch (str, optional):
        Convert all PDF files in the provided directory. This argument should be
        directory path.
    output_path (str, optional):
        Output file path. File format of it is depends on ``format``.
        Same as ``--outfile`` option of tabula-java.
    force_subprocess (bool):
        Force to use tabula-java subprocess mode. If you have some issue with
        jpype, try this option with same environment.
        Default ``False``.
    options (str, optional):
        Raw option string for tabula-java.

Returns:
    list of DataFrame.

Raises:
    FileNotFoundError:
        If downloaded remote file doesn't exist.

    ValueError:
        If output_format is unknown format, or if downloaded remote file size is 0.

    tabula.errors.CSVParseError:
        If pandas CSV parsing failed.

    tabula.errors.JavaNotFoundError:
        If java is not installed or found.

    subprocess.CalledProcessError:
        If tabula-java execution failed.


Examples:

    You can use template file extracted by tabula app.

    >>> import tabula
    >>> tabula.read_pdf_with_template(pdf_path, "/path/to/data.tabula-template.json")
    [             Unnamed: 0   mpg  cyl   disp   hp  ...   qsec  vs  am  gear  carb
    0             Mazda RX4  21.0    6  160.0  110  ...  16.46   0   1     4     4
    1         Mazda RX4 Wag  21.0    6  160.0  110  ...  17.02   0   1     4     4
    2            Datsun 710  22.8    4  108.0   93  ...  18.61   1   1     4     1
    3        Hornet 4 Drive  21.4    6  258.0  110  ...  19.44   1   0     3     1
    4     Hornet Sportabout  18.7    8  360.0  175  ...  17.02   0   0     3     2
    5               Valiant  18.1    6  225.0  105  ...  20.22   1   0     3     1
    6            Duster 360  14.3    8  360.0  245  ...  15.84   0   0     3     4
    7             Merc 240D  24.4    4  146.7   62  ...  20.00   1   0     4     2
    8              Merc 230  22.8    4  140.8   95  ...  22.90   1   0     4     2
    9              Merc 280  19.2    6  167.6  123  ...  18.30   1   0     4     4
    10            Merc 280C  17.8    6  167.6  123  ...  18.90   1   0     4     4
    11           Merc 450SE  16.4    8  275.8  180  ...  17.40   0   0     3     3
    12           Merc 450SL  17.3    8  275.8  180  ...  17.60   0   0     3     3
    13          Merc 450SLC  15.2    8  275.8  180  ...  18.00   0   0     3     3
    14   Cadillac Fleetwood  10.4    8  472.0  205  ...  17.98   0   0     3     4
    15  Lincoln Continental  10.4    8  460.0  215  ...  17.82   0   0     3     4
    16    Chrysler Imperial  14.7    8  440.0  230  ...  17.42   0   0     3     4
    17             Fiat 128  32.4    4   78.7   66  ...  19.47   1   1     4     1
    18          Honda Civic  30.4    4   75.7   52  ...  18.52   1   1     4     2
    19       Toyota Corolla  33.9    4   71.1   65  ...  19.90   1   1     4     1
    20        Toyota Corona  21.5    4  120.1   97  ...  20.01   1   0     3     1
    21     Dodge Challenger  15.5    8  318.0  150  ...  16.87   0   0     3     2
    22          AMC Javelin  15.2    8  304.0  150  ...  17.30   0   0     3     2
    23           Camaro Z28  13.3    8  350.0  245  ...  15.41   0   0     3     4
    24     Pontiac Firebird  19.2    8  400.0  175  ...  17.05   0   0     3     2
    25            Fiat X1-9  27.3    4   79.0   66  ...  18.90   1   1     4     1
    26        Porsche 914-2  26.0    4  120.3   91  ...  16.70   0   1     5     2
    27         Lotus Europa  30.4    4   95.1  113  ...  16.90   1   1     5     2
    28       Ford Pantera L  15.8    8  351.0  264  ...  14.50   0   1     5     4
    29         Ferrari Dino  19.7    6  145.0  175  ...  15.50   0   1     5     6
    30        Maserati Bora  15.0    8  301.0  335  ...  14.60   0   1     5     8
    31           Volvo 142E  21.4    4  121.0  109  ...  18.60   1   1     4     2
    [32 rows x 12 columns],
        0            1             2            3        4
    0  NaN  Sepal.Width  Petal.Length  Petal.Width  Species
    1  5.1          3.5           1.4          0.2   setosa
    2  4.9          3.0           1.4          0.2   setosa
    3  4.7          3.2           1.3          0.2   setosa
    4  4.6          3.1           1.5          0.2   setosa
    5  5.0          3.6           1.4          0.2   setosa,
        0             1            2             3            4          5
    0  NaN  Sepal.Length  Sepal.Width  Petal.Length  Petal.Width    Species
    1  145           6.7          3.3           5.7          2.5  virginica
    2  146           6.7          3.0           5.2          2.3  virginica
    3  147           6.3          2.5           5.0          1.9  virginica
    4  148           6.5          3.0           5.2          2.0  virginica
    5  149           6.2          3.4           5.4          2.3  virginica,
        Unnamed: 0 supp  dose
    0          4.2   VC   0.5
    1         11.5   VC   0.5
    2          7.3   VC   0.5
    3          5.8   VC   0.5
    4          6.4   VC   0.5
    5         10.0   VC   0.5
    6         11.2   VC   0.5
    7         11.2   VC   0.5
    8          5.2   VC   0.5
    9          7.0   VC   0.5
    10        16.5   VC   1.0
    11        16.5   VC   1.0
    12        15.2   VC   1.0
    13        17.3   VC   1.0]
z.json)r4   suffixr5   r6   r7   r8   r9   r:   r;   r<   r#   r=   r>   r?   r@   rA   r   )r2   r   r   r   )r   r   r   rd   r   merger&   listextendappendrJ   rQ   )r0   re   r2   r   r   r4   r5   r6   r7   r8   r9   r:   r;   r<   r#   r=   r>   r?   r@   rA   r   r   r   r]   _options_force_option
dataframesoption_dfs                                r-   read_pdf_with_templaterr     s    \ $W+OD) T"H #)M  JF-!)!1 ,,V45C #t$$!!#&!!#&  IIdO IIdO s   A+B= = Cc                 \   Ub  [        U5      S:X  a  [        S5      e[        U5      n[        UUUUUU	U
UUUUUUUS9n[	        U 5      u  nn[
        R                  R                  U5      (       d=  [        [        R                  [
        R                  " [        R                  5      U5      e[
        R                  R                  U5      S:X  a  [        U S35      e [        UUUUS9  U(       a  [
        R                  " U5        gg! U(       a  [
        R                  " U5        f f = f)a`  Convert tables from PDF into a file.
Output file will be saved into `output_path`.

Args:
    input_path (file like obj):
        File like object of target PDF file.
    output_path (str):
        File path of output file.
    output_format (str, optional):
        Output format of this function (``csv``, ``json`` or ``tsv``).
        Default: ``csv``
    java_options (list, optional):
        Set java options. This option will be ignored once JVM is launched.

        Example:
            ``"-Xmx256m"``.
    pages (str, int, `iterable` of `int`, optional):
        An optional values specifying pages to extract from. It allows
        `str`,`int`, `iterable` of :`int`. Default: `1`

        Examples:
            ``'1-2,3'``, ``'all'``, ``[1,2]``
    guess (bool, optional):
        Guess the portion of the page to analyze per page. Default `True`
        If you use "area" option, this option becomes `False`.

        Note:
            As of tabula-java 1.0.3, guess option becomes independent from
            lattice and stream option, you can use guess and lattice/stream option
            at the same time.

    area (iterable of float, iterable of iterable of float, optional):
        Portion of the page to analyze(top,left,bottom,right).
        Default is entire page.

        Note:
            If you want to use multiple area options and extract in one table, it
            should be better to set ``multiple_tables=False`` for :func:`read_pdf()`

        Examples:
            ``[269.875,12.75,790.5,561]``,
            ``[[12.1,20.5,30.1,50.2], [1.0,3.2,10.5,40.2]]``

    relative_area (bool, optional):
        If all area values are between 0-100 (inclusive) and preceded by ``'%'``,
        input will be taken as % of actual height or width of the page.
        Default ``False``.
    lattice (bool, optional):
        Force PDF to be extracted using lattice-mode extraction
        (if there are ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    stream (bool, optional):
        Force PDF to be extracted using stream-mode extraction
        (if there are no ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    password (str, optional):
        Password to decrypt document. Default: empty
    silent (bool, optional):
        Suppress all stderr output.
    columns (Sequence, optional):
        X coordinates of column boundaries. Must be sorted and of a datatype that
        preserves order, e.g. tuple or list

        Example:
            ``[10.1, 20.2, 30.3]``
    format (str, optional):
        Format for output file or extracted object.
        (``"CSV"``, ``"TSV"``, ``"JSON"``)
    batch (str, optional):
        Convert all PDF files in the provided directory. This argument should be
        directory path.
    force_subprocess (bool):
        Force to use tabula-java subprocess mode. If you have some issue with
        jpype, try this option with same environment.
        Default ``False``.
    options (str, optional):
        Raw option string for tabula-java.

Raises:
    FileNotFoundError:
        If downloaded remote file doesn't exist.

    ValueError:
        If output_format is unknown format, or if downloaded remote file size is 0.

    tabula.errors.JavaNotFoundError:
        If java is not installed or found.

    subprocess.CalledProcessError:
        If tabula-java execution failed.
Nr   z('output_path' shoud not be None or emptyrh   rG   r   )rR   rI   _extract_format_for_conversionr   r   rJ   r   rK   rL   rM   rN   rO   rP   r.   rQ   )r0   rA   r1   r   r6   r7   r8   r9   r:   r;   r<   r#   r=   r>   r?   r@   r   r   r\   r   r]   s                        r-   convert_intorv     s   ` c+.!3CDD+M:F!#)N" $J/OD)77>>$bkk%,,.GNN	wwt!D6!TUVV^\4BRSIIdO 9IIdO s    D  D+	input_dirc                     U b$  [         R                  R                  U 5      (       d  [        S5      e[	        U5      n[        UUUUUUU	U
UUUU UUS9n[        UX/S9  g)a!  Convert tables from PDFs in a directory.

Args:
    input_dir (str):
        Directory path.
    output_format (str, optional):
        Output format of this function (csv, json or tsv)
    java_options (list, optional):
        Set java options like `-Xmx256m`.
        This option will be ignored once JVM is launched.
    pages (str, int, `iterable` of `int`, optional):
        An optional values specifying pages to extract from. It allows
        `str`,`int`, `iterable` of :`int`. Default: `1`

        Examples:
            ``'1-2,3'``, ``'all'``, ``[1,2]``
    guess (bool, optional):
        Guess the portion of the page to analyze per page. Default `True`
        If you use "area" option, this option becomes `False`.

        Note:
            As of tabula-java 1.0.3, guess option becomes independent from
            lattice and stream option, you can use guess and lattice/stream option
            at the same time.

    area (iterable of float, iterable of iterable of float, optional):
        Portion of the page to analyze(top,left,bottom,right).
        Default is entire page.

        Note:
            If you want to use multiple area options and extract in one table, it
            should be better to set ``multiple_tables=False`` for :func:`read_pdf()`

        Examples:
            ``[269.875,12.75,790.5,561]``,
            ``[[12.1,20.5,30.1,50.2], [1.0,3.2,10.5,40.2]]``

    relative_area (bool, optional):
        If all area values are between 0-100 (inclusive) and preceded by ``'%'``,
        input will be taken as % of actual height or width of the page.
        Default ``False``.
    lattice (bool, optional):
        Force PDF to be extracted using lattice-mode extraction
        (if there are ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    stream (bool, optional):
        Force PDF to be extracted using stream-mode extraction
        (if there are no ruling lines separating each cell, as in a PDF of an
        Excel spreadsheet)
    password (str, optional):
        Password to decrypt document. Default: empty
    silent (bool, optional):
        Suppress all stderr output.
    columns (Sequence, optional):
        X coordinates of column boundaries. Must be sorted and of a datatype that
        preserves order, e.g. tuple or list

        Example:
            ``[10.1, 20.2, 30.3]``
    relative_columns (bool, optional):
        If all values are between 0-100 (inclusive) and preceded by '%',
        input will be taken as % of actual width of the page.
        Default ``False``.
    format (str, optional):
        Format for output file or extracted object.
        (``"CSV"``, ``"TSV"``, ``"JSON"``)
    force_subprocess (bool):
        Force to use tabula-java subprocess mode. If you have some issue with
        jpype, try this option with same environment.
        Default ``False``.
    options (str, optional):
        Raw option string for tabula-java.

Returns:
    Nothing. Outputs are saved into the same directory with `input_dir`

Raises:
    ValueError:
        If input_dir doesn't exist.
Nz0'input_dir' should be an existing directory pathrh   rt   )rJ   r   isdirrI   ru   r   r.   )rw   r1   r   r6   r7   r8   r9   r:   r;   r<   r#   r=   r>   r?   rA   r   r   r\   s                     r-   convert_into_by_batchrz   I  ss    H i 8 8KLL+M:F!#)N" 	Ir/   _java_optionsc                 8   U c  / n O+[        U [        5      (       a  [        R                  " U 5      n [        R
                  " 5       S:X  a,  Sn[        [        UR                  U 5      5      (       d  U S/-   n US:X  a  [        S U  5       5      (       d  U S/-  n U $ )NDarwinzjava.awt.headlessr    utf-8c              3   ,   #    U  H
  nS U;   v   M     g7f)zfile.encodingN ).0opts     r-   	<genexpr>&_build_java_options.<locals>.<genexpr>  s     C]c?c)]s   r   )	r&   strshlexsplitplatformsystemanyfilterfind)r{   r   rs      r-   r$   r$     s     	M3	'	'M2 H$6!&&-011)-G,HHM7C]CCC455Mr/   c                     U R                  5       S:X  a  gU R                  5       S:X  a  gU R                  5       S:X  a  g[        SU < 35      e)NcsvCSVrD   rE   tsvTSVrF   )rH   rI   )r1   s    r-   ru   ru     sR    %				&	(				%	'2M#3455r/   ra   c           
         / nUc  0 nUR                  SS5      n[        X5      u  p4U  GH  n[        US   5      S:X  a  M  US    VVs/ s H2  nU Vs/ s H!  owS   (       d  [        R                  OUS   PM#     snPM4     nnnUn	[        U[        5      (       a  U(       d  UR                  U5      n	Sn
[        U	5       H&  u  pU[        R                  L d  M  SU
 3X'   U
S-  n
M(     [        [        5      n[        U	5       H4  u  pX   nUS:  a  US-   X'   U SU 3nX   nUS:  a  M  XU'   US-   X'   M6     [        R                  " SXS	.UD6nUR                  S
5      (       d/  UR                   H  n [        R                  " UU   SS9UU'   M!     UR!                  U5        GM     U$ s  snf s  snnf ! [        [        4 a     MY  f = f)zExtract tables from json.

Args:
    raw_json (list):
        Decoded list from tabula-java JSON.
    pandas_options (dict optional):
        pandas options for `pd.DataFrame()`
Nr=   datar   textz	Unnamed: r   .)r   r=   dtyperaise)rZ   r   )pop_convert_pandas_csv_optionsrR   npnanr&   int	enumerater   rV   	DataFramerU   r=   
to_numericrI   	TypeErrorrl   )ra   r2   data_framesr=   header_line_numbertablerowrb   	list_data_columns_unname_idxidxcolcounts	cur_countdfcs                    r-   rT   rT     s    K  D1G"=n"VGuV}" V}
$ >AASV9RVV!F)3SA$ 	 
 (#..w }}%78HK%h/"&&=&/}$=HM1$K 0
 &1%5F &h/"K	!m"+a-FK E9+.C &I  !m
 !$'!m 0 \\MyMnM!!'**ZZMM"Q%@BqE   	2U X O B
D #I.  s*   	
F7(F2;F7;F=2F7=GGc                     U R                  SU5      nU R                  SS5      nU R                  SS5        US:X  a  [        U5      (       d  SOSnX$4$ UnX$4$ )zTranslate `pd.read_csv()` options into `pd.DataFrame()` especially for header.

Args:
    pandas_options (dict):
        pandas options like {'header': None}.
    columns (iterable):
        iterable of column name.
namesheaderinferr   Nr   )r   bool)r2   r=   r   r   r   s        r-   r   r   (  sl     !!'73H'2Fz4(&*8nnQ$ '' $''r/   )NNr~   F)Nr~   NNTNFNTNFFFNNNFNNNF )Nr~   NNFNFNFFFNNNFNNNFN)r   NNTNFFFNNNFNNFr   )Nr~   )r   )N):__doc__rM   rX   rD   rJ   r   r   collectionsr   copyr   dataclassesr   loggingr   typingr   r   r	   r
   r   r   r   r   numpyr   pandasrV   backendr   r   rZ   r   	file_utilr   templater   utilr   r   __name__r)   r   __annotations__r   r   r.   r   floatr   rd   rr   rv   rz   r$   ru   rT   r   r   r/   r-   <module>r      s  $  	  	   #    N N N   / ! $ # +	8	 ;?
HU8%5567 >
 )-"(6(649%(6 3-(6 	(6
 (6 	(6Z $((,/3  $6:HL"!)-" !%"/^,^,C=^, ^, 49%	^,
 T#s(^,^, ^, ^, ^, E#sHSM123^, ^, 5%(8E?*CCD
E^, ^, ^, ^, sm^,  TN!^," huo&#^,$ %^,& SM'^,( C=)^,* #+^,, -^,. /^,0 4tCH~-.1^,H 04(, $6:HL"!)-" !%"!-www T#s(^,w 	w
 49%w w w E#sHSM123w w 5%(8E?*CCD
Ew w w w smw TNw  huo&!w" #w$ SM%w& C='w( #)w* +w, c]-w. 
",,/wz (,6:HL"!)-" "%RRR R 49%	R
 E#sHSM123R R 5%(8E?*CCD
ER R R R smR TNR huo&R R SMR  C=!R" #R$ %R& 
'Rn (,6:HL"!)-" !%"#zJzJzJ 49%zJ E#sHSM123	zJ
 zJ 5%(8E?*CCD
EzJ zJ zJ zJ smzJ TNzJ huo&zJ zJ SMzJ #zJ  !zJ" #zJ$ 
%zJ| @GDI&9<	#Y*6# 6# 6 EI?3i?)1$sCx.)A?	",,?D(cN(-5c](
3(r/   