U
    Bb9
                    @   s&  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZmZmZ ddlmZmZ ddlmZmZ ddlmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# dd	l$m%Z& dd
l$m'Z' dd
l$m'Z( ddl$m)Z) ddl$m*Z+ ddl$m,Z- ddl$m.Z.m/Z/ ddl$m0Z1 ddl$m2Z3 ddl$m4Z5 ddl6m7Z7m8Z8m9Z9 ddl:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJ ddlKmLZLmMZM ddlNmOZO ePeQeeQee
df f dddZRePeQeeQee
df f dddZSG dd de?ZTG dd dZUG d d! d!eUZVdS )"    N)BytesIO)Path)	AnyCallableDictIterableListOptionalTupleUnioncast   )
EncryptionPasswordType)
PageObject_VirtualList)
StrByteType
StreamTypeb_deprecate_no_replacementdeprecate_with_replacementread_non_whitespaceread_previous_lineread_until_whitespaceskip_over_commentskip_over_whitespace)CatalogAttributes)CatalogDictionary)CheckboxRadioButtonAttributes)Core)DocumentInformationAttributes)FieldDictionaryAttributesGoToActionArguments)PageAttributes)PagesAttributes)TrailerKeys)PdfReadErrorPdfReadWarningPdfStreamError)ArrayObjectContentStreamDecodedStreamObjectDestinationDictionaryObjectEncodedStreamObjectFieldFloatObjectIndirectObject
NameObject
NullObjectNumberObject	PdfObjectTextStringObject
TreeObjectread_object)OutlinesTypePagemodeType)XmpInformation.)dsizereturnc                 C   s4   |dkrt dd|  } | dd  } td| d S )N   zinvalid size in convert_to_ints           iz>qr   )r&   structunpackr<   r=    rC   2/tmp/pip-unpacked-wheel-weujb6sz/PyPDF2/_reader.pyconvert_to_int`   s
    rE   c                 C   s   t dd t| |S )NconvertToIntrE   )r   rE   rB   rC   rC   rD   rF   h   s    
rF   c                   @   s"  e Zd ZdZddddZeee dddZeee dd	d
Ze	ee dddZ
e	ee dddZe	ee dddZe	ee dddZe	ee dddZe	ee dddZe	ee dddZe	ee dddZe	ee dddZe	ee dddZdS )DocumentInformationa  
    A class representing the basic document metadata provided in a PDF File.
    This class is accessible through :py:class:`PdfReader.metadata<PyPDF2.PdfReader.metadata>`.

    All text properties of the document metadata have
    *two* properties, eg. author and author_raw. The non-raw property will
    always return a ``TextStringObject``, making it ideal for a case where
    the metadata is being displayed. The raw property can sometimes return
    a ``ByteStringObject``, if PyPDF2 was unable to decode the string's
    text encoding; this requires additional safety in the caller and
    therefore is not as commonly accessed.
    Nr>   c                 C   s   t |  d S N)r-   __init__selfrC   rC   rD   rJ   }   s    zDocumentInformation.__init__)keyr>   c                 C   s   |  |d }t|tr|S d S rI   )get
isinstancer6   )rL   rM   retvalrC   rC   rD   	_get_text   s    
zDocumentInformation._get_textc                 C   s   t d | |S )z
        The text value of the specified key or None.

        .. deprecated:: 1.28.0

            Use the attributes (e.g. :py:attr:`title` / :py:attr:`author`).
        getText)r   rQ   )rL   rM   rC   rC   rD   rR      s    zDocumentInformation.getTextc                 C   s,   |  tjr(| tjp*|  tj S dS )z
        Read-only property accessing the document's **title**.

        Returns a unicode string (``TextStringObject``) or ``None``
        if the title is not specified.
        N)rN   DITITLErQ   
get_objectrK   rC   rC   rD   title   s    

zDocumentInformation.titlec                 C   s   |  tjS )z>The "raw" version of title; can return a ``ByteStringObject``.)rN   rS   rT   rK   rC   rC   rD   	title_raw   s    zDocumentInformation.title_rawc                 C   s   |  tjS )z
        Read-only property accessing the document's **author**.

        Returns a unicode string (``TextStringObject``) or ``None``
        if the author is not specified.
        )rQ   rS   AUTHORrK   rC   rC   rD   author   s    zDocumentInformation.authorc                 C   s   |  tjS )z?The "raw" version of author; can return a ``ByteStringObject``.)rN   rS   rX   rK   rC   rC   rD   
author_raw   s    zDocumentInformation.author_rawc                 C   s   |  tjS )z
        Read-only property accessing the document's **subject**.

        Returns a unicode string (``TextStringObject``) or ``None``
        if the subject is not specified.
        )rQ   rS   SUBJECTrK   rC   rC   rD   subject   s    zDocumentInformation.subjectc                 C   s   |  tjS )z@The "raw" version of subject; can return a ``ByteStringObject``.)rN   rS   r[   rK   rC   rC   rD   subject_raw   s    zDocumentInformation.subject_rawc                 C   s   |  tjS )ax  
        Read-only property accessing the document's **creator**.

        If the document was converted to PDF from another format, this is the
        name of the application (e.g. OpenOffice) that created the original
        document from which it was converted. Returns a unicode string
        (``TextStringObject``) or ``None`` if the creator is not specified.
        )rQ   rS   CREATORrK   rC   rC   rD   creator   s    
zDocumentInformation.creatorc                 C   s   |  tjS )z@The "raw" version of creator; can return a ``ByteStringObject``.)rN   rS   r^   rK   rC   rC   rD   creator_raw   s    zDocumentInformation.creator_rawc                 C   s   |  tjS )a\  
        Read-only property accessing the document's **producer**.

        If the document was converted to PDF from another format, this is
        the name of the application (for example, OSX Quartz) that converted
        it to PDF. Returns a unicode string (``TextStringObject``)
        or ``None`` if the producer is not specified.
        )rQ   rS   PRODUCERrK   rC   rC   rD   producer   s    
zDocumentInformation.producerc                 C   s   |  tjS )zAThe "raw" version of producer; can return a ``ByteStringObject``.)rN   rS   ra   rK   rC   rC   rD   producer_raw   s    z DocumentInformation.producer_raw)__name__
__module____qualname____doc__rJ   strr	   rQ   rR   propertyrV   rW   rY   rZ   r\   r]   r_   r`   rb   rc   rC   rC   rC   rD   rG   o   s0   		rG   c                	   @   s  e Zd ZdZdeeef eedee	f ddddZ
eeddd	Zeee dd
dZee dddZeee dddZeee dddZee dddZeee dddZedddZedddZeedddZeedddZeedd d!Zeeeef dd"d#Zeeeef dd$d%Z dee! eeeef  ee eeeef  d&d'd(Z"dee! eeeef  ee eeeef  d&d)d*Z#ee!e$f eeef eedd+d,d-Z%ee!e$f eedd&d.d/Z&eeedd0d1d2Z'eeef dd3d4Z(eeef dd5d6Z)dee!df ee eeef d7d8d9Z*dee!df ee eeef d7d:d;Z+ee,dd<d=Z-dee$ ee e,d>d?d@Z.dee$ ee e,d>dAdBZ/edee0e1f edCdDdEZ2eedFdGdHZ3eedFdIdJZ4e5edKdLdMZ6e5edKdNdOZ7ee8ee9e1e0e$f  e5dPdQdRZ:e$ee5 dSdTdUZ;ee<ddVdWZ=eee ddXdYZ>ee ddZd[Z?eee dd\d]Z@eeeA dd^d_ZBeeA dd`daZCeeeA ddbdcZDdede$ef eeeef  ee1 ddddedfZEe1eeeFef dgdhdiZGe1eeF dgdjdkZHe1eeF dldmdnZIeJeKeef dodpdqZLeJeKeef dodrdsZMeeeeF dtdudvZNeeeeF dtdwdxZOeeeeF eeF dydzd{ZPeeeeF eeF dyd|d}ZQeJddod~dZReJddoddZSeJddoddZTeJedoddZUeJddoddZVeJee eddddZWeJee doddZXeJeee dddZYeJeeZe[e\f doddZ]e^eJeedddZ_eJddoddZ`e8e eaegeeeKedf f f eaeeeeKedf f gef ddddZbeddddZce8e edeKeef  dddZedeJee	dddZfdeJee	dddZgeee	f ehdddZieeeef dddZjeedddZkedddZleedddZmeeeeef  dddZndS )	PdfReadera  
    Initialize a PdfReader object.

    This operation can take some time, as the PDF stream's cross-reference
    tables are read into memory.

    :param stream: A File object or an object that supports the standard read
        and seek methods similar to a File object. Could also be a
        string representing a path to a PDF file.
    :param bool strict: Determines whether user should be warned of all
        problems and also causes some correctable problems to be fatal.
        Defaults to ``False``.
    :param None/str/bytes password: Decrypt PDF file at initialization. If the
        password is None, the file will not be decrypted.
        Defaults to ``None``
    FN)streamstrictpasswordr>   c           	   	   C   s4  || _ d | _i | _d| _d | _t|dr>d|jkr>tdt	 t
|ttfrnt|d}t| }W 5 Q R X | | || _d| _d | _| jrd| _| jtj}|r|d  jnd}tt| jtj  }t||| _|d k	r|nd}| j|tj kr|d k	rt!d	d| _n|d k	r0t!d
d S )Nr   modebzQPdfReader stream/file object is not in binary mode. It may not be read correctly.rbFT    zWrong passwordNot encrypted file)"rl   flattened_pagesresolved_objects
xref_index_page_id2numhasattrrn   warningswarnr'   rO   rh   r   openr   readrk   _override_encryption_encryptionis_encryptedtrailerrN   TKIDrU   original_bytesr   r-   ENCRYPTr   verifyr   ZNOT_DECRYPTEDr&   )	rL   rk   rl   rm   fhZid_entryZ	id1_entryZencrypt_entrypwdrC   rC   rD   rJ      sH    
 
zPdfReader.__init__rH   c                 C   s<   | j  }| j dd | j dd}| j |d |S )Nr   r?   zutf-8)rk   tellseekr{   decode)rL   locZpdf_file_versionrC   rC   rD   
pdf_header+  s
    
zPdfReader.pdf_headerc                 C   s0   t j| jkrdS | jt j }t }|| |S )a=  
        Retrieve the PDF file's document information dictionary, if it exists.
        Note that some PDF files use metadata streams instead of docinfo
        dictionaries, and these metadata streams will not be accessed by this
        function.

        :return: the document information of this PDF file
        N)r   INFOr   rG   update)rL   objrP   rC   rC   rD   metadata5  s    

zPdfReader.metadatac                 C   s   t dd | jS )d
        .. deprecated:: 1.28.0

            Use the attribute :py:attr:`metadata` instead.
        getDocumentInfor   r   r   rK   rC   rC   rD   r   F  s    
zPdfReader.getDocumentInfoc                 C   s   t dd | jS )r   documentInfor   r   rK   rC   rC   rD   r   O  s    
zPdfReader.documentInfoc                 C   s&   zd| _ | jtj jW S d| _ X dS )a  
        XMP (Extensible Metadata Platform) data

        :return: a :class:`XmpInformation<xmp.XmpInformation>`
            instance that can be used to access XMP metadata from the document.
            or ``None`` if no metadata was found on the document root.
        FTN)r|   r   r   ROOTxmp_metadatarK   rC   rC   rD   r   Y  s    	zPdfReader.xmp_metadatac                 C   s   t dd | jS )h
        .. deprecated:: 1.28.0

            Use the attribute :py:attr:`xmp_metadata` instead.
        getXmpMetadatar   r   r   rK   rC   rC   rD   r   h  s    
zPdfReader.getXmpMetadatac                 C   s   t dd | jS )r   xmpMetadatar   r   rK   rC   rC   rD   r   q  s    
zPdfReader.xmpMetadatac                 C   s:   | j r| jtj d d S | jdkr,|   t| jS dS )z
        Calculate the number of pages in this PDF file.

        :return: number of pages
        :raises PdfReadError: if file is encrypted and restrictions prevent
            this action.
        /Pages/CountN)r~   r   r   r   rs   _flattenlenrK   rC   rC   rD   _get_num_pages{  s
    
zPdfReader._get_num_pagesc                 C   s   t dd |  S )\
        .. deprecated:: 1.28.0

            Use :code:`len(reader.pages)` instead.
        zreader.getNumPageslen(reader.pages)r   r   rK   rC   rC   rD   getNumPages  s    
zPdfReader.getNumPagesc                 C   s   t dd |  S )r   zreader.numPagesr   r   rK   rC   rC   rD   numPages  s    
zPdfReader.numPages)
pageNumberr>   c                 C   s   t dd | |S )zc
        .. deprecated:: 1.28.0

            Use :code:`reader.pages[pageNumber]` instead.
        zreader.getPage(pageNumber)zreader.pages[pageNumber])r   	_get_page)rL   r   rC   rC   rD   getPage  s
     zPdfReader.getPage)page_numberr>   c                 C   s.   | j dkr|   | j dk	s$td| j | S )z
        Retrieve a page by number from this PDF file.

        :param int page_number: The page number to retrieve
            (pages begin at zero)
        :return: a :class:`PageObject<PyPDF2._page.PageObject>` instance.
        Nhint for mypy)rs   r   AssertionError)rL   r   rC   rC   rD   r     s    

zPdfReader._get_pagec                 C   s   t dd | jS )`
        .. deprecated:: 1.28.0

            Use :py:attr:`named_destinations` instead.
        namedDestinationsnamed_destinations)r   r   rK   rC   rC   rD   r     s    
zPdfReader.namedDestinationsc                 C   s   |   S )zv
        A read-only dictionary which maps names to
        :class:`Destinations<PyPDF2.generic.Destination>`
        )_get_named_destinationsrK   rC   rC   rD   r     s    zPdfReader.named_destinations)treerP   fileobjr>   c           
      C   s   t  }|t  |dkrXi }tt| jtj }t	j
|krTttt |t	j
 }ndS |dkrd|S | ||| |D ] }||krv| ||||  qqvd|krtt|d }|D ]}| }	| |	||| q|S )a  
        Extract field data if this PDF contains interactive form fields.

        The *tree* and *retval* parameters are for recursive use.

        :param fileobj: A file object (usually a text file) to write
            a report to on all interactive form fields found.
        :return: A dictionary where each key is a field name, and each
            value is a :class:`Field<PyPDF2.generic.Field>` object. By
            default, the mapping name is used for keys.
            ``None`` if form data could not be located.
        Nz/Fields)r!   Zattributes_dictr   r   r   r-   r   r   r   r   Z	ACRO_FORMr	   r7   _check_kids_build_fieldr)   rU   )
rL   r   rP   r   field_attributescatalogattrfieldsffieldrC   rC   rD   
get_fields  s*    
zPdfReader.get_fieldsc                 C   s   t dd | |||S )zU
        .. deprecated:: 1.28.0

            Use :meth:`get_fields` instead.
        	getFieldsr   )r   r   )rL   r   rP   r   rC   rC   rD   r     s    
zPdfReader.getFields)r   rP   r   r   r>   c                 C   s   |  ||| z|d }W n: tk
rT   z|d }W n tk
rN   Y Y d S X Y nX |rr| ||| |d t|||< d S )Nz/TM/T
)r   KeyError_write_fieldwriter/   )rL   r   rP   r   r   rM   rC   rC   rD   r   
  s    
zPdfReader._build_fieldc                 C   s0   t j|kr,|t j D ]}| | || qd S rI   )PAKIDSr   rU   )rL   r   rP   r   kidrC   rC   rD   r     s    
zPdfReader._check_kids)r   r   r   r>   c           	   	   C   s  t  }|t  }|D ]}|t jt jfkr.q|| }z|t jkr|ddddd}|| |kr||d |||   d  nv|t jkrz|| t j }W n" t	k
r   || t j
 }Y nX ||d | d  n||d t||  d  W q t	k
r
   Y qX qd S )NZButtonTextZChoice	Signature)z/Btn/Txz/Chz/Sig: r   )r!   
attributesr   ZKidsZAAZFTr   ZParentZTMr   Trh   )	rL   r   r   r   Zfield_attributes_tupler   	attr_nametypesnamerC   rC   rD   r   '  s8    

 
"zPdfReader._write_fieldc                    s&   |     dkri S  fdd D S )a6  
        Retrieve form fields from the document with textual data.

        The key is the name of the form field, the value is the content of the
        field.

        If the document contains multiple form fields with the same name, the
        second and following will get the suffix _2, _3, ...
        Nc                    s6   i | ].} |  d dkr | d  |  dqS )z/FTr   r   z/V)rN   ).0r   Z
formfieldsrC   rD   
<dictcomp>Z  s   
 z2PdfReader.get_form_text_fields.<locals>.<dictcomp>)r   rK   rC   r   rD   get_form_text_fieldsL  s    
zPdfReader.get_form_text_fieldsc                 C   s   t dd |  S )z_
        .. deprecated:: 1.28.0

            Use :meth:`get_form_text_fields` instead.
        getFormTextFieldsr   )r   r   rK   rC   rC   rD   r   `  s    
zPdfReader.getFormTextFields)r   rP   r>   c           
      C   s<  |dkrni }t t| jtj }tj|kr:t t|tj }n4tj|krnt t|tj }tj|krnt t|tj }|dkrz|S t	j
|krt t|t	j
 D ]}| | | qtj|kr8t t|tj }tdt|dD ]`}t t||  }||d   }t|trd|kr|d }| ||}	|	dk	r|	||< q|S )z
        Retrieve the named destinations present in the document.

        :return: a dictionary which maps names to
            :class:`Destinations<PyPDF2.generic.Destination>`.
        Nr      r   /D)r   r-   r   r   r   CAZDESTSr7   ZNAMESr   r   r)   r   rU   ranger   rh   rO   _build_destination)
rL   r   rP   r   namesr   irM   valuedestrC   rC   rD   r   i  s2    




z!PdfReader._get_named_destinationsc                 C   s   t dd | ||S )r   getNamedDestinationsr   )r   r   )rL   r   rP   rC   rC   rD   r     s    

zPdfReader.getNamedDestinationsc                 C   s   |   S )z
        Read-only property for outlines present in the document.

        :return: a nested list of :class:`Destinations<PyPDF2.generic.Destination>`.
        )_get_outlinesrK   rC   rC   rD   outlines  s    zPdfReader.outlines)noder   r>   c                 C   s   |d krg }t t| jtj }tj|krzt t|tj }W n tk
rT   | Y S X t|t	rd|S |d k	rd|krt t|d }| 
 | _|d kr|S | |}|r|| d|krg }| t t|d | |r|| d|krqt t|d }q|S )N/Firstz/Next)r   r-   r   r   r   COZOUTLINESr&   rO   r3   r   _namedDests_build_outlineappendr   )rL   r   r   r   linesoutlineZsub_outlinesrC   rC   rD   r     s6    






zPdfReader._get_outlinesc                 C   s   t dd | ||S )zV
        .. deprecated:: 1.28.0

            Use :py:attr:`outlines` instead.
        getOutlinesr   )r   r   )rL   r   r   rC   rC   rD   r     s    
zPdfReader.getOutlines)indirect_refr>   c                 C   sp   | j dkr dd t| jD | _ |dks2t|tr6dS t|trF|}n|j}| j dk	s^td| j |d}|S )zGenerate _page_id2numNc                 S   s   i | ]\}}|j j|qS rC   )r   idnum)r   r   xrC   rC   rD   r     s     z:PdfReader._get_page_number_by_indirect.<locals>.<dictcomp>r   )	rv   	enumeratepagesrO   r3   intr   r   rN   )rL   r   r   retrC   rC   rD   _get_page_number_by_indirect  s    

z&PdfReader._get_page_number_by_indirect)pager>   c                 C   s   |  |jS )a  
        Retrieve page number of a given PageObject

        :param PageObject page: The page to get page number. Should be
            an instance of :class:`PageObject<PyPDF2._page.PageObject>`
        :return: the page number or -1 if page not found
        )r   r   rL   r   rC   rC   rD   get_page_number  s    zPdfReader.get_page_numberc                 C   s   t dd | |S )zZ
        .. deprecated:: 1.28.0

            Use :meth:`get_page_number` instead.
        getPageNumberr   )r   r   r   rC   rC   rD   r     s    
zPdfReader.getPageNumber)destinationr>   c                 C   s   |  |jS )z
        Retrieve page number of a given Destination object.

        :param Destination destination: The destination to get page number.
        :return: the page number or -1 if page not found
        )r   r   rL   r   rC   rC   rD   get_destination_page_number  s    z%PdfReader.get_destination_page_numberc                 C   s   t dd | |S )zf
        .. deprecated:: 1.28.0

            Use :meth:`get_destination_page_number` instead.
        getDestinationPageNumberr   )r   r   r   rC   rC   rD   r     s
     z"PdfReader.getDestinationPageNumber)rV   arrayr>   c              	   C   s   d\}}t |td tfs:t |tr0t|dks:t |trTt }td}t|||S |dd \}}|dd  }zt|||f| W S  tk
r   t	
d| d| t | jr | jd j}|d krt n|}t||td Y S X d S )N)NNr   z/Fitr   zUnknown destination:  )rO   typer3   r)   r   rh   r6   r,   r&   rx   ry   r'   rl   r   r   )rL   rV   r   r   typtmpr   rC   rC   rD   r   "  s6    
  zPdfReader._build_destination)r   r>   c                 C   s  d\}}}z|d }W n, t k
rB   | jr:td|d}Y nX d|kr~tt|d }tt|tj }|dkr|tj }n*d|kr|d }t	|trd|kr|d }t	|t
r| ||}nt	|tr
z| || j| j}W n" t k
r   | |d }Y nX n@t	|td r(| ||}n"| jr>td	|| |d }|rd
|krxt
dd |d
 D |td
< d|kr|d |td< d|kr|d |td< |S )N)NNNz/Titlez(Outline Entry Missing /Title attribute:  z/Az/GoToz/Destr   zUnexpected destination z/Cc                 s   s   | ]}t |V  qd S rI   )r0   )r   crC   rC   rD   	<genexpr>q  s     z+PdfReader._build_outline.<locals>.<genexpr>z/Fr   )r   rl   r&   r   r-   r2   r"   SDrO   r)   r   rh   r   Z
dest_arrayr   )rL   r   r   rV   r   actionZaction_typerC   rC   rD   r   B  sH    





zPdfReader._build_outlinec                 C   s   t | j| jS )zWRead-only property that emulates a list of :py:class:`Page<PyPDF2._page.Page>` objects.)r   r   r   rK   rC   rC   rD   r   }  s    zPdfReader.pagesc                 C   s0   t t| jtj }tj|kr,t t|tj S dS )a  
        Get the page layout.

        :return: Page layout currently being used.

        .. list-table:: Valid ``layout`` values
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right
        N)r   r-   r   r   r   CDZPAGE_LAYOUTr2   )rL   r   rC   rC   rD   page_layout  s    
zPdfReader.page_layoutc                 C   s   t dd | jS )Y
        .. deprecated:: 1.28.0

            Use :py:attr:`page_layout` instead.
        getPageLayoutr  r   r  rK   rC   rC   rD   r    s    
zPdfReader.getPageLayoutc                 C   s   t dd | jS )r  
pageLayoutr  r  rK   rC   rC   rD   r    s    
zPdfReader.pageLayoutc                 C   s.   z| j tj d W S  tk
r(   Y dS X dS )aQ  
        Get the page mode.

        :return: Page mode currently being used.

        .. list-table:: Valid ``mode`` values
           :widths: 50 200

           * - /UseNone
             - Do not show outlines or thumbnails panels
           * - /UseOutlines
             - Show outlines (aka bookmarks) panel
           * - /UseThumbs
             - Show page thumbnails panel
           * - /FullScreen
             - Fullscreen view
           * - /UseOC
             - Show Optional Content Group (OCG) panel
           * - /UseAttachments
             - Show attachments panel
        z	/PageModeN)r   r   r   r   rK   rC   rC   rD   	page_mode  s    zPdfReader.page_modec                 C   s   t dd | jS )W
        .. deprecated:: 1.28.0

            Use :py:attr:`page_mode` instead.
        getPageModer  r   r  rK   rC   rC   rD   r	    s    
zPdfReader.getPageModec                 C   s   t dd | jS )r  pageModer  r
  rK   rC   rC   rD   r    s    
zPdfReader.pageMode)r   inheritr   r>   c                 C   s(  t tjt tjt tjt tjf}|d kr0i }|d krZ| jtj 	 }|d 	 }g | _
d}tj|krr|tj }|dkr|D ]}||kr~|| ||< q~|tj D ].}i }	t|tr||	d< | j|	 |f|	 qnP|dkr$t| D ]\}
}|
|kr|||
< qt| |}|| | j
| d S )Nr   r   z/Page)r2   PGZ	RESOURCESZMEDIABOXZCROPBOXZROTATEr   r   r   rU   rs   r   ZTYPEr   rO   r1   r   listitemsr   r   r   )rL   r   r  r   Zinheritable_page_attributesr   tr   r   ZaddtZattr_inr   Zpage_objrC   rC   rD   r     s<    






zPdfReader._flatten)indirect_referencer>   c                 C   s  | j |j \}}t|d|  }|d dks0t||d k s@ttt| }t|d D ] }t	| |
dd t|}t	| |
dd t|}t	| |
dd ||jkrq\| jr||krtd|
t|d | d t	| |
dd zt|| }	W nh tk
rv }
 zHtd	| d
|j d|j d|
 t | jr`td|
 t }	W 5 d }
~
X Y nX |	  S | jrtdt S )Nr   /Typez/ObjStmz/Nr   r   zObject is in wrong index.r   zInvalid stream (index z) within object r   r   zCan't read object stream: z%This is a fatal error in strict mode.)xref_objStmr   r1   rU   r   r   r   get_datar   r   r   r4   Zread_from_streamrl   r&   r   r8   r(   rx   ry   
generationr'   r3   )rL   r  ZstmnumidxZobj_stmstream_datar   Zobjnumoffsetr   excrC   rC   rD   _get_object_from_stream  sF    


z!PdfReader._get_object_from_streamc              
   C   s  |  |j|j}|d k	r|S |jdkr@|j| jkr@| |}n|j| jkr|j| j|j kr| j|ji |jdrt S | j|j |j }| j	
|d | | j	\}}||jkr| jr| jrtd|j d|j d| d| d	n n:||jkr,| jr,td|j d|j d| d| d	| jrD||jksDtt| j	| }| js| jd k	r| j sxtdtt|}| j||j|j}n.td	|j d|j d
t | jrtd| |j|j| |S )Nr   FzExpected object ID (r   z) does not match actual (z); xref table not zero-indexed.z).zFile has not been decryptedzObject z not defined.zCould not find object.)cache_get_indirect_objectr  r   r  r  xrefxref_free_entryrN   r3   rk   r   read_object_headerru   rl   r&   r   r8   r|   r}   Zis_decryptedr   r5   Zdecrypt_objectrx   ry   r'   cache_indirect_object)rL   r  rP   startr   r  rC   rC   rD   rU   F  sr     

   
    zPdfReader.get_object)indirectReferencer>   c                 C   s   t dd | |S )zU
        .. deprecated:: 1.28.0

            Use :meth:`get_object` instead.
        	getObjectrU   )r   rU   )rL   r!  rC   rC   rD   r"    s    
zPdfReader.getObject)rk   r>   c                 C   s   d}t | |t|O }|dd t|}|t|O }|dd t|}|t|O }|dd |d}t| |dd |r| jrtd| d| t	 t
|t
|fS )NFr   r      z.Superfluous whitespace found in object header r   )r   r   r   r   r{   r   rl   rx   ry   r'   r   )rL   rk   extrar   r  _objrC   rC   rD   r    s&    

zPdfReader.read_object_headerc                 C   s   t dd | |S )z]
        .. deprecated:: 1.28.0

            Use :meth:`read_object_header` instead.
        readObjectHeaderr  )r   r  )rL   rk   rC   rC   rD   r&    s    
zPdfReader.readObjectHeader)r  r   r>   c                 C   s   | j ||fS rI   )rt   rN   rL   r  r   rC   rC   rD   r    s    z#PdfReader.cache_get_indirect_objectc                 C   s   t dd | ||S )zd
        .. deprecated:: 1.28.0

            Use :meth:`cache_get_indirect_object` instead.
        cacheGetIndirectObjectr  )r   r  r'  rC   rC   rD   r(    s
     z PdfReader.cacheGetIndirectObject)r  r   r   r>   c                 C   sJ   ||f| j kr8d| d| }| jr.t|n
t| || j ||f< |S )NzOverwriting cache for r   )rt   rl   r&   rx   ry   )rL   r  r   r   msgrC   rC   rD   r    s    

zPdfReader.cache_indirect_objectc                 C   s   t dd | |||S )z`
        .. deprecated:: 1.28.0

            Use :meth:`cache_indirect_object` instead.
        cacheIndirectObjectr  )r   r  )rL   r  r   r   rC   rC   rD   r*    s    
zPdfReader.cacheIndirectObjectc           
   
   C   s  |  | | | | |}| ||}|dkrZ| jrF|rFtdntd| dt | 	||| | j
r| js| }| j D ]v\}}|dkrq|D ]^}||| d z| |\}}	W n tk
r   Y  qY nX ||| j
 kr| |  qqq||d d S )Nr   zBroken xref tablezincorrect startxref pointer()i  )_basic_validation_find_eof_marker_find_startxref_pos_get_xref_issuesrl   r&   rx   ry   r'   _read_xref_tables_and_trailersru   r   r  r  r   r  
ValueError
_zero_xref)
rL   rk   	startxrefxref_issue_nrr   genZ
xref_entryidpidZ_pgenrC   rC   rD   r{     s6    





 
zPdfReader.readc                 C   sl   | dtj | std| jrh| dtj |d}|dkrZtd|d d| dtj d S )Nr   zCannot read an empty file   s   %PDF-zPDF starts with 'utf8z', but '%PDF-' expected)	r   osSEEK_ENDr   r&   rl   SEEK_SETr{   r   )rL   rk   Zheader_byterC   rC   rD   r,    s    
zPdfReader._basic_validationc                 C   sF   |  d d }d}|d d dkrB|  |k r8tdt|}qd S )Ni   r   rq   r8  s   %%EOFzEOF marker not found)r   r&   r   )rL   rk   Zlast_mblinerC   rC   rD   r-    s    zPdfReader._find_eof_markerc                 C   s   t |}zt|}W nF tk
rZ   |ds6tdt|dd  }tdt Y n"X t |}|dd dkr|td|S )z5Find startxref entry - the location of the xref tables	   startxrefzstartxref not found	   Nz startxref on same line as offset)	r   r   r1  
startswithr&   striprx   ry   r'   )rL   rk   r=  r3  rC   rC   rD   r.    s    
zPdfReader._find_startxref_posc                 C   s  | d}|d d dkr"tdt| |dd d}ttt|| }|rn|dkrn|| _| jrnt	
d	t d
}t| |dd ttt|| }t| |dd d}||k r| d}|d dkr|dd | d}q|d dkr|dd |d d d\}}	|dd }
t|t|	 }}|| jkrRi | j|< i | j|< || j| krdn || j| |< |
dk| j| |< |d7 }|d7 }qt| |dd | d}|dkr|dd q:qq:d S )N   r#  s   refzxref table read errorr   r   Tr   zFXref table not zero-indexed. ID numbers for objects will be corrected.F   s   
is   0123456789t                f   s   traileri)r{   r&   r   r   r   r   r8   ru   rl   rx   ry   r'   splitr  r  )rL   rk   refZ	firsttimenumr=   Zcntr=  Zoffset_bZgeneration_bZentry_type_br  r  Z
trailertagrC   rC   rD   _read_standard_xref_table0  s\    







z#PdfReader._read_standard_xref_table)rk   r3  r4  r>   c                 C   s  i | _ i | _i | _t | _|d k	r||d |d}|dkrL| |}q|r~z| | W qW q t	k
rz   d}Y qX q|
 r| |}tjtjtjtjf}|D ]*}||kr|| jkr||| jt|< qd|krtt|d }qqq| ||}qd S )Nr   r      x/Prev)r  r  r  r-   r   r   r{   
_read_xref_rebuild_xref_table	Exceptionisdigit_read_pdf15_xref_streamr   r   r   r   r   Zraw_getr2   r   r   _read_xref_other_error)rL   rk   r3  r4  r   
xrefstreamZtrailer_keysrM   rC   rC   rD   r0  {  s2    


z(PdfReader._read_xref_tables_and_trailersc                 C   sx   |  | t| |dd ttttf t|| }| D ]\}}|| j	kr>|| j	|< q>d|krp|d }|S d S d S )Nr   r   rN  )
rL  r   r   r   r   rh   r   r8   r  r   )rL   rk   new_trailerrM   r   r3  rC   rC   rD   rO    s    

zPdfReader._read_xref)rk   r3  r>   c                 C   s   |dkr&| j rtdntd d S |dd |d}|d}|dkr^|d	| 8 }|S ||d d
}tdD ]"}|d rv||7 }d} qqv|r|S d| j	kr| j stdt
 z| | W d S  tk
r   tdY nX d S tdd S )Nr   z6/Prev=0 in the trailer (try opening with strict=False)zA/Prev=0 in the trailer - assuming there is no previous xref tableir   rB     xrefr   
   Fr8  Tz/Rootz"Invalid parent xref., rebuild xrefzcan not rebuild xrefz/Could not find xref table at specified location)rl   r&   rx   ry   r   r{   findr   rR  r   r'   rP  rQ  )rL   rk   r3  r   Zxref_locfoundZlookrC   rC   rD   rT    sB    


z PdfReader._read_xref_other_errorc                    s  | dd |\}}ttt|}|d dks:t||| tt|	 |
dd|
dg}ttttf |
d t d	kstjrt d	krtd
  tttttdf f d fdd}tttttdf f tdfdd}||| |S )Nr   r   r  z/XRefz/Indexr   z/Sizez/Wr#  zToo many entry sizes: .)r   r>   c                    s<    |  dkr(  |  }t| |  S | dkr4dS dS d S )Nr   r   )r{   rE   )r   r<   )entry_sizesr  rC   rD   	get_entry  s    z4PdfReader._read_pdf15_xref_stream.<locals>.get_entry)rK  r  r>   c                    s   |  j |g kp|  jkS rI   )r  rN   r  )rK  r  rK   rC   rD   used_before  s    z6PdfReader._read_pdf15_xref_stream.<locals>.used_before)r   r  r   r*   r8   r   r  r   r   r  rN   r   r   r   rl   r&   r   r   r
   bool_read_xref_subsections)rL   rk   r   r  rU  	idx_pairsr\  r]  rC   )r[  rL   r  rD   rS    s    &&z!PdfReader._read_pdf15_xref_streamc                 C   s~   |  |d d | d}|dkr&dS | d}|dkrzd}|dkr\| d}|dkr<dS q<|| d7 }| d	krzd
S dS )zBReturn an int which indicates an issue. 0 means there is no issue.r   r   s   
 	rA  rW  rq   s   0123456789 	r   s   objr#  )r   r{   lower)rk   r3  r=  rC   rC   rD   r/    s    


zPdfReader._get_xref_issuesc           	      C   s   i | _ |dd |d}td|D ]H}t|d}t|d}|| j kr\i | j |< |d| j | |< q(|dd td|D ]N}||dd tt	t
t
f t|| }t| D ]\}}|| j|< qqd S )Nr   r   s(   [\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+objr   r   s$   [\r\n \t][ \t]*trailer[\r\n \t]*(<<))r  r   r{   refinditerr   groupr   r   r   r   r8   r  r  r   )	rL   rk   Zf_mr   r  rV  rM   r   rC   rC   rD   rP    s    


zPdfReader._rebuild_xref_table.)r`  r\  r]  r>   c                 C   s   d}|  |D ]\}}||ks"t|| }t||| D ]}|d}|dkr^|d}	|d}
q8|dkr|d}|d}|| jkri | j|< |||s|| j| |< q8|dkr|d}|d}d}|||s||f| j|< q8| jr8td| q8qd S )Nr   r   r   zUnknown xref type: )_pairsr   r   r  r  rl   r&   )rL   r`  r\  r]  Zlast_endr   r=   rK  Z	xref_typeZnext_free_objectZnext_generationZbyte_offsetr  Z
objstr_numZ	obstr_idxrC   rC   rD   r_  -  s0    




z PdfReader._read_xref_subsections)r  r>   c                    s*    fddt  j|  D  j|< d S )Nc                    s   i | ]\}}| j  |qS rC   )ru   )r   kvrK   rC   rD   r   S  s     z(PdfReader._zero_xref.<locals>.<dictcomp>)r  r  r  )rL   r  rC   rK   rD   r2  R  s    
zPdfReader._zero_xref)r   r>   c                 c   s:   d}|| ||d  fV  |d7 }|d t |krq6qd S )Nr   r   r   )r   )rL   r   r   rC   rC   rD   rf  W  s
    zPdfReader._pairsr   )rk   limit_offsetr>   c                 C   s   t ddd g }| dks(| |kr0td|d}| dk rNtd|d	d |d
krd}|d
kr|d}|d
kr|dd d}| dk rtd|d	d qf||rdndd qq|| q|  d|S )z.. deprecated:: 2.1.0read_next_end_linez4.0.0)Z
removed_inr   z!Could not read malformed PDF filer   r   zEOL marker not found)   
   Fr   Trq   )r   r   r&   r{   r   r   reversejoin)rL   rk   ri  Z
line_partsr   ZcrlfrC   rC   rD   rj  _  s4    


 zPdfReader.read_next_end_linec                 C   s   t d | ||S )z.. deprecated:: 1.28.0readNextEndLine)r   rj  )rL   rk   ri  rC   rC   rD   rp    s    zPdfReader.readNextEndLine)rm   r>   c                 C   s   | j std| j |S )ag  
        When using an encrypted / secured PDF file with the PDF Standard
        encryption handler, this function will allow the file to be decrypted.
        It checks the given password against the document's user password and
        owner password, and then stores the resulting decryption key if either
        password is correct.

        It does not matter which password was matched.  Both passwords provide
        the correct decryption key that will allow the document to be used with
        this library.

        :param str password: The password to match.
        :return: `PasswordType`.
        rr   )r}   r&   r   )rL   rm   rC   rC   rD   decrypt  s    zPdfReader.decrypt)permissions_coder>   c                 C   s   i }|d@ dk|d< |d@ dk|d< |d@ dk|d< |d@ dk|d	< |d
@ dk|d< |d@ dk|d< |d@ dk|d< |d@ dk|d< |S )NrA  r   printr?   modifyrC  copy    annotations   Zformsi   Zaccessabilityi   Zassemblei   Zprint_high_qualityrC   )rL   rr  permissionsrC   rC   rD   decode_permissions  s    
zPdfReader.decode_permissionsc                 C   s   t j| jkS )z
        Read-only boolean property showing whether this PDF file is encrypted.
        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<PyPDF2.PdfReader.decrypt>` method is called.
        )r   r   r   rK   rC   rC   rD   r~     s    zPdfReader.is_encryptedc                 C   s   t dd | jS )Z
        .. deprecated:: 1.28.0

            Use :py:attr:`is_encrypted` instead.
        getIsEncryptedr~   r   r~   rK   rC   rC   rD   r|    s    
zPdfReader.getIsEncryptedc                 C   s   t dd | jS )r{  isEncryptedr~   r}  rK   rC   rC   rD   r~    s    
zPdfReader.isEncryptedc           
      C   s   d }i }t t| jtj }d|ks*|d s.d S t t|d }d|krt t|d }t|}|D ]D}|}t|}t	|t
r^t tt | }|r^t|j}	|	||< q^|S )Nz	/AcroFormz/XFA)r   r-   r   r   r   r7   r)   iternextrO   r1   r	   r.   rU   zlib
decompress_data)
rL   r   rP   r   r   r   r   tagr   esrC   rC   rD   xfa  s$    

zPdfReader.xfa)FN)NNN)NNN)NN)NN)NN)NN)NNN)r   )r   )ord   re   rf   rg   r   r   r   r^  rh   bytesrJ   ri   r   r	   rG   r   r   r   r;   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r7   r   r   r-   r   r   r   r   r   r   r   r9   r   r   r   r3   r1   r   r   r   r,   r   r   r   r4   r   r   r   r   r  r  r  r:   r  r	  r  r   r5   r  rU   r"  r   r
   r  r&  r  r(  r  r*  r{   r,  r-  r.  rL  r0  rO  rT  r*   r.   r+   rS  staticmethodr/  rP  r   r_  r2  r   rf  rj  rp  r   rq  rz  r~   r|  r~  r  rC   rC   rC   rD   rj      s    
4								   /   


  %  

/  

	    0    
	
 ;			   /6C
      %L  ! 0(% 	  "  		rj   c                       s&   e Zd Zeedd fddZ  ZS )PdfFileReaderN)argskwargsr>   c                    s8   t dd d|kr&t|dk r&d|d< t j|| d S )Nr  rj   rl   r   T)r   r   superrJ   )rL   r  r  	__class__rC   rD   rJ     s    
zPdfFileReader.__init__)rd   re   rf   r   rJ   __classcell__rC   rC   r  rD   r    s   r  )Wr:  rb  r@   rx   r  ior   pathlibr   typingr   r   r   r   r   r	   r
   r   r   r}   r   r   Z_pager   r   _utilsr   r   r   r   r   r   r   r   r   r   	constantsr   r   r   r  r   r   r   r    rS   r!   r"   r#   r  r$   r   r%   r   errorsr&   r'   r(   Zgenericr)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r   r9   r:   xmpr;   r  r   rE   rF   rG   rj   r  rC   rC   rC   rD   <module>   sX   ,0H"	 v            