o
    >es4                 
   @   s<  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z- ddl+m.Z/ ddl+m0Z0m1Z1m2Z2 ddl+m3Z4 ddl+m5Z6 ddl+m7Z8 ddl+m9Z: ddl+m;Z< ddl+m=Z> ddl?m@Z@mAZAmBZBmCZCmDZD ddlEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZUmVZVmWZWmXZX ddlYmZZZm[Z[ ddl\m]Z] de^de_dee_eedf f fddZ`G dd deKZaG d d! d!ZbG d"d# d#eeef ZcdS )$    N)datetime)BytesIOUnsupportedOperation)Path)AnyCallableDictIterableIteratorListMappingOptionalTupleUnioncast   )
EncryptionPasswordType)
PageObject_VirtualList)index2label)StrByteType
StreamTypeb_deprecate_with_replacementlogger_warningparse_iso8824_dateread_non_whitespaceread_previous_lineread_until_whitespaceskip_over_commentskip_over_whitespace)CatalogAttributes)CatalogDictionary)CheckboxRadioButtonAttributesGoToActionArgumentsUserAccessPermissions)Core)DocumentInformationAttributes)FieldDictionaryAttributes)PageAttributes)PagesAttributes)TrailerKeys)EmptyFileErrorFileNotDecryptedErrorPdfReadErrorPdfStreamErrorWrongPasswordError)ArrayObjectBooleanObjectContentStreamDecodedStreamObjectDestinationDictionaryObjectEncodedStreamObjectFieldFitFloatObjectIndirectObject
NameObject
NullObjectNumberObject	PdfObjectTextStringObject
TreeObjectViewerPreferencesread_object)OutlineTypePagemodeType)XmpInformationdsizereturn.c                 C   s4   |dkrt dd|  } | dd  } td| d S )N   zinvalid size in convert_to_ints           iz>qr   )r/   structunpack)rH   rI    rN   N/var/www/html/humari/django-venv/lib/python3.10/site-packages/pypdf/_reader.pyconvert_to_intp   s
   rP   c                   @   sd  e Zd ZdZd%ddZdedee fddZedee fd	d
Z	edee fddZ
edee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fdd Zedee fd!d"Zedee fd#d$ZdS )&DocumentInformationa  
    A class representing the basic document metadata provided in a PDF File.
    This class is accessible through
    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.

    All text properties of the document metadata have
    *two* properties, eg. author and author_raw. The non-raw property will
    always return a ``TextStringObject``, making it ideal for a case where
    the metadata is being displayed. The raw property can sometimes return
    a ``ByteStringObject``, if pypdf was unable to decode the string's
    text encoding; this requires additional safety in the caller and
    therefore is not as commonly accessed.
    rJ   Nc                 C   s   t |  d S N)r7   __init__selfrN   rN   rO   rS      s   zDocumentInformation.__init__keyc                 C   s   |  |d }t|tr|S d S rR   )get
isinstancerA   )rU   rV   retvalrN   rN   rO   	_get_text   s   
zDocumentInformation._get_textc                 C   s,   |  tjr| tjp|  tj S dS )z
        Read-only property accessing the document's title.

        Returns a ``TextStringObject`` or ``None`` if the title is not
        specified.
        N)rW   DITITLErZ   
get_objectrT   rN   rN   rO   title   s
   

zDocumentInformation.titlec                 C      |  tjS )z>The "raw" version of title; can return a ``ByteStringObject``.)rW   r[   r\   rT   rN   rN   rO   	title_raw      zDocumentInformation.title_rawc                 C   r_   )z
        Read-only property accessing the document's author.

        Returns a ``TextStringObject`` or ``None`` if the author is not
        specified.
        )rZ   r[   AUTHORrT   rN   rN   rO   author      zDocumentInformation.authorc                 C   r_   )z?The "raw" version of author; can return a ``ByteStringObject``.)rW   r[   rb   rT   rN   rN   rO   
author_raw   ra   zDocumentInformation.author_rawc                 C   r_   )z
        Read-only property accessing the document's subject.

        Returns a ``TextStringObject`` or ``None`` if the subject is not
        specified.
        )rZ   r[   SUBJECTrT   rN   rN   rO   subject   rd   zDocumentInformation.subjectc                 C   r_   )z@The "raw" version of subject; can return a ``ByteStringObject``.)rW   r[   rf   rT   rN   rN   rO   subject_raw   ra   zDocumentInformation.subject_rawc                 C   r_   )ac  
        Read-only property accessing the document's creator.

        If the document was converted to PDF from another format, this is the
        name of the application (e.g. OpenOffice) that created the original
        document from which it was converted. Returns a ``TextStringObject`` or
        ``None`` if the creator is not specified.
        )rZ   r[   CREATORrT   rN   rN   rO   creator      
zDocumentInformation.creatorc                 C   r_   )z@The "raw" version of creator; can return a ``ByteStringObject``.)rW   r[   ri   rT   rN   rN   rO   creator_raw   ra   zDocumentInformation.creator_rawc                 C   r_   )aG  
        Read-only property accessing the document's producer.

        If the document was converted to PDF from another format, this is the
        name of the application (for example, OSX Quartz) that converted it to
        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
        specified.
        )rZ   r[   PRODUCERrT   rN   rN   rO   producer   rk   zDocumentInformation.producerc                 C   r_   )zAThe "raw" version of producer; can return a ``ByteStringObject``.)rW   r[   rm   rT   rN   rN   rO   producer_raw   ra   z DocumentInformation.producer_rawc                 C      t | tjS )z:Read-only property accessing the document's creation date.)r   rZ   r[   CREATION_DATErT   rN   rN   rO   creation_date      z!DocumentInformation.creation_datec                 C   r_   )z
        The "raw" version of creation date; can return a ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )rW   r[   rq   rT   rN   rN   rO   creation_date_raw   rd   z%DocumentInformation.creation_date_rawc                 C   rp   )z
        Read-only property accessing the document's modification date.

        The date and time the document was most recently modified.
        )r   rZ   r[   MOD_DATErT   rN   rN   rO   modification_date   s   z%DocumentInformation.modification_datec                 C   r_   )z
        The "raw" version of modification date; can return a
        ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )rW   r[   ru   rT   rN   rN   rO   modification_date_raw   s   	z)DocumentInformation.modification_date_raw)rJ   N)__name__
__module____qualname____doc__rS   strr   rZ   propertyr^   r`   rc   re   rg   rh   rj   rl   rn   ro   r   rr   rt   rv   rw   rN   rN   rN   rO   rQ   x   s@    
			rQ   c                   @   s  e Zd ZdZedee fddZ		ddee	e
f ded	edeef ddfd
dZ		ddedee f dedee f deeef fddZedefddZedee fddZedee fddZdefddZdedefddZedeeef fddZ			ddee deeeef  dee deeeef  fd d!Zd"edefd#d$Z d%eeef deeef ded&eddf
d'd(Z!deeef dededdfd)d*Z"ded%ed&eddfd+d,Z#dd-edeeef fd.d/Z$		ddeedf dee deeef fd0d1Z%ede&fd2d3Z'	dd4ee d5ee de&fd6d7Z(edee) fd8d9Z*d:edee+e,f dee fd;d<Z-d=edee fd>d?Z.d@e/dee fdAdBZ0dCedDee1ee2e,de+ef   de/fdEdFZ3d4edee/ fdGdHZ4ede1e fdIdJZ5ede1e fdKdLZ6edee fdMdNZ7edee8 fdOdPZ9			ddQedeef dReeeef  d:ee, ddfdSdTZ:d:e,deee;ef fdUdVZ<dWedXedee; fdYdZZ=d:eee,f dee; fd[d\Z>de?de@eef fd]d^ZAd_ed`edee; fdadbZBd_ed`edcee; dee; fdddeZCde?ddfdfdgZDde?ddfdhdiZEde?ddfdjdkZFde?defdldmZGde?ddfdndoZHde?dpee dqeddfdrdsZIde?dee fdtduZJde?dpedee fdvdwZKde?deeLeMeNf fdxdyZOePde?dpedefdzd{ZQde?ddfd|d}ZRd~e1e deSegeee@edf f f deSeeee@edf f gef ddfddZTdDe1e dee@eef  fddZUd	eeef deVfddZWdedeeef fddZXedeeY fddZZedefddZ[edeeeef  fddZ\dedee fddZ]dedee fddZ^ede_ee1e f fddZ`de1e fddZadede1e fddZb	ddee deeeee1e f f fddZcdS )	PdfReadera  
    Initialize a PdfReader object.

    This operation can take some time, as the PDF stream's cross-reference
    tables are read into memory.

    Args:
        stream: A File object or an object that supports the standard read
            and seek methods similar to a File object. Could also be a
            string representing a path to a PDF file.
        strict: Determines whether user should be warned of all
            problems and also causes some correctable problems to be fatal.
            Defaults to ``False``.
        password: Decrypt PDF file at initialization. If the
            password is None, the file will not be decrypted.
            Defaults to ``None``
    rJ   c                 C   sD   t t| jd tjd}|du rdS | }t|ts t|}|S )zCReturns the existing ViewerPreferences as an overloaded dictionary./RootN)	r   r7   trailerrW   CDVIEWER_PREFERENCESr]   rX   rC   )rU   orN   rN   rO   viewer_preferences  s   
zPdfReader.viewer_preferencesFNstreamstrictpasswordc           	      C   s@  || _ d | _i | _d| _d | _t|drd|jvrtdt t	|t
tfr@t|d}t| }W d    n1 s;w   Y  | | || _d| _d | _| jrd| _| jtj}|rd|d  jnd}tt| jtj  }t||| _|d ur~|nd}| j|tjkr|d urt d	d| _d S |d urt!d
d S )Nr   modebzQPdfReader stream/file object is not in binary mode. It may not be read correctly.rbFT    zWrong passwordNot encrypted file)"r   flattened_pagesresolved_objects
xref_index_page_id2numhasattrr   r   rx   rX   r|   r   openr   readr   _override_encryption_encryptionis_encryptedr   rW   TKIDr]   original_bytesr   r7   ENCRYPTr   verifyr   NOT_DECRYPTEDr1   r/   )	rU   r   r   r   fhid_entry	id1_entryencrypt_entrypwdrN   rN   rO   rS   '  sF   

zPdfReader.__init__includeexcludec                    s^   | j d | j  }d|i}durfdd| D } dur- fdd| D }|S )z
        Integration into Jupyter Notebooks.

        This method returns a dictionary that maps a mime-type to it's
        representation.

        See https://ipython.readthedocs.io/en/stable/config/integrating.html
        r   zapplication/pdfNc                    s   i | ]\}}| v r||qS rN   rN   .0kv)r   rN   rO   
<dictcomp>p      z/PdfReader._repr_mimebundle_.<locals>.<dictcomp>c                    s   i | ]\}}| vr||qS rN   rN   r   )r   rN   rO   r   t  r   )r   seekr   items)rU   r   r   pdf_datadatarN   )r   r   rO   _repr_mimebundle_[  s   
zPdfReader._repr_mimebundle_c                 C   s>   | j  }| j dd | j ddd}| j |d |S )z
        The first 8 bytes of the file.

        This is typically something like ``'%PDF-1.6'`` and can be used to
        detect if the file is actually a PDF file and which version it is.
        r   rK   zutf-8backslashreplace)r   tellr   r   decode)rU   locpdf_file_versionrN   rN   rO   
pdf_headerx  s
   

zPdfReader.pdf_headerc                 C   sF   t j| jvrdS | jt j }t }t|tdrtd|| |S )a  
        Retrieve the PDF file's document information dictionary, if it exists.

        Note that some PDF files use metadata streams instead of docinfo
        dictionaries, and these metadata streams will not be accessed by this
        function.
        NzEtrailer not found or does not point to document information directory)r   INFOr   rQ   rX   typer/   update)rU   objrY   rN   rN   rO   metadata  s   	
zPdfReader.metadatac                 C   s&   zd| _ | jtj jW d| _ S d| _ w )z(XMP (Extensible Metadata Platform) data.TF)r   r   r   ROOTxmp_metadatarT   rN   rN   rO   r     s   zPdfReader.xmp_metadatac                 C   s6   | j r| jtj d d S | jdu r|   t| jS )a   
        Calculate the number of pages in this PDF file.

        Returns:
            The number of pages of the parsed PDF file

        Raises:
            PdfReadError: if file is encrypted and restrictions prevent
                this action.
        /Pages/CountN)r   r   r   r   r   _flattenlenrT   rN   rN   rO   _get_num_pages  s
   

zPdfReader._get_num_pagespage_numberc                 C   s.   | j du r	|   | j dusJ d| j | S )z
        Retrieve a page by number from this PDF file.

        Args:
            page_number: The page number to retrieve
                (pages begin at zero)

        Returns:
            A :class:`PageObject<pypdf._page.PageObject>` instance.
        Nhint for mypy)r   r   )rU   r   rN   rN   rO   	_get_page  s   

zPdfReader._get_pagec                 C      |   S )zu
        A read-only dictionary which maps names to
        :class:`Destinations<pypdf.generic.Destination>`
        )_get_named_destinationsrT   rN   rN   rO   named_destinations  s   zPdfReader.named_destinationstreerY   fileobjc           
      C   s   t  }|t  |du r,i }tt| jtj }t	j
|v r*ttt |t	j
 }ndS |du r2|S | ||| |D ]}||v rK| ||||  nq;d|v rhtt|d }|D ]}| }	| |	||| qY|S )aX  
        Extract field data if this PDF contains interactive form fields.

        The *tree* and *retval* parameters are for recursive use.

        Args:
            tree:
            retval:
            fileobj: A file object (usually a text file) to write
                a report to on all interactive form fields found.

        Returns:
            A dictionary where each key is a field name, and each
            value is a :class:`Field<pypdf.generic.Field>` object. By
            default, the mapping name is used for keys.
            ``None`` if form data could not be located.
        N/Fields)FAattributes_dictr   r$   r   r7   r   r   r   r   	ACRO_FORMr   rB   _check_kids_build_fieldr2   r]   )
rU   r   rY   r   field_attributescatalogattrfieldsffieldrN   rN   rO   
get_fields  s,   
zPdfReader.get_fieldsparentc                 C   sR   d|v rt t|d S d|v r"| t t|d d t t|d  S t t|d S )N/TM/Parent./T)r   r|   _get_qualified_field_namer7   )rU   r   rN   rN   rO   r     s   z#PdfReader._get_qualified_field_namer   r   c           
      C   s\  |  ||| z	tt|d }W n3 tyC   zd|v r)| tt|d d }nd}|tt|d 7 }W n ty@   Y Y d S w Y nw |rR| ||| |d t|||< || j	
 }|tjddkru|ttj || td< |tjdd	krd
|v rtt|d
 d  || td< d|| d vr|| td td d S d S |tjdd	kr&|tjdtjj@ dkr(g }t||| td< |tji D ]&}|
 }t|d
 d  D ]}	|	|vr||	 qt||| td< q|tjdtjj@ dkr*d|| d v r,|| d || d d= d S d S d S d S d S )Nr   r   r    r   
/Chz	/_States_/Btnz/AP/Nz/Offr   )r   r   r|   KeyErrorr   r7   _write_fieldwriter9   indirect_referencer]   rW   r   FTr=   Optr2   listkeysappendFfFfBitsRadioKidsNoToggleToOffindex)
rU   r   rY   r   r   rV   r   statesr   srN   rN   rO   r     sb   
0
 
zPdfReader._build_fieldc                 C   s4   t j|v r|t j D ]}| | || q
d S d S rR   )PAKIDSr   r]   )rU   r   rY   r   kidrN   rN   rO   r   K  s
   
zPdfReader._check_kidsc           	   	   C   s  t  }|t  }|D ]t}|t jt jfv rq|| }z\|t jkr>ddddd}|| |v r=|| d|||   d n8|t jkriz	|| t j }W n t	y\   || t j
 }Y nw || d| d n|| d||  d W q t	y   Y qw d S )NButtonTextChoice	Signature)r   /Txr   z/Sig: r   )r   
attributesr$   r   AAr   r   ParentTMr   T)	rU   r   r   r   field_attributes_tupler   	attr_nametypesnamerN   rN   rO   r   S  sB   


zPdfReader._write_fieldfull_qualified_namec                 C   s   dt dtttf dt fdd}|  }|du ri S i }| D ]$\}}|ddkrC|r4|d	||< q|d	||tt |d
 |< q|S )a  
        Retrieve form fields from the document with textual data.

        Args:
            full_qualified_name: to get full name

        Returns:
            A dictionary. The key is the name of the form field,
            the value is the content of the field.

            If the document contains multiple form fields with the same name, the
            second and following will get the suffix .2, .3, ...
        r   r   rJ   c                    s2    |vr S  d t t fdd|D d  S )Nr   c                    s   g | ]}|  d  rdqS )r   r   )
startswith)r   kkr   rN   rO   
<listcomp>  r   zGPdfReader.get_form_text_fields.<locals>.indexed_key.<locals>.<listcomp>   )r|   sum)r   r   rN   r  rO   indexed_key  s   z3PdfReader.get_form_text_fields.<locals>.indexed_keyNz/FTr   z/Vr   )r|   r   r   r   r   rW   r   )rU   r  r  
formfieldsffr   valuerN   rN   rO   get_form_text_fieldsx  s   zPdfReader.get_form_text_fieldsc                 C   s  |du r7i }t t| jtj }tj|v rt t|tj }ntj|v r7t t|tj }tj|v r7t t|tj }|du r=|S t	j
|v rWt t|t	j
 D ]
}| | | qJ|S tj|v rt t|tj }d}|t|k rt t||  }|d7 }t|tsqfz||  }W n
 ty   Y |S w |d7 }t|trd|v r|d }nqf| ||}	|	dur|	||< |t|k sl|S | D ]'\}
}| }t|trd|v r|d  }nq| |
|}	|	dur|	||
< q|S )z
        Retrieve the named destinations present in the document.

        Args:
            tree:
            retval:

        Returns:
            A dictionary which maps names to
            :class:`Destinations<pypdf.generic.Destination>`.
        Nr   r   /D)r   r7   r   r   r   CADESTSrB   NAMESr   r   r2   r   r]   r   r|   rX   
IndexError_build_destinationr   )rU   r   rY   r   namesr   irV   r  destk__v__valrN   rN   rO   r     sd   



"




z!PdfReader._get_named_destinationsc                 C   r   )z
        Read-only property for the outline present in the document.

        (i.e., a collection of 'outline items' which are also known as
        'bookmarks')
        )_get_outlinerT   rN   rN   rO   outline  s   zPdfReader.outlinenoder  c                 C   s   |d u r7g }t t| jtj }tj|v r2t t|tj }t|tr#|S |d ur2d|v r2t t|d }| 	 | _
|d u r=|S 	 | |}|rJ|| d|v rbg }| t t|d | |rb|| d|vri	 |S t t|d }q>)N/FirstTz/Next)r   r7   r   r   r   COOUTLINESrX   r>   r   _namedDests_build_outline_itemr   r  )rU   r  r  r   linesoutline_objsub_outlinerN   rN   rO   r    s4   





zPdfReader._get_outlinec                 C   s0   t t| jtj }tj|v rt d|tj S dS )u   
        Read-only property for the list of threads.

        See §8.3.2 from PDF 1.7 spec.

        It's an array of dictionaries with "/F" and "/I" properties or
        None if there are no articles.
        r2   N)r   r7   r   r   r   r   THREADS)rU   r   rN   rN   rO   threads  s   

zPdfReader.threadsr   c                 C   sp   | j du rdd t| jD | _ |du st|trdS t|tr#|}n|j}| j dus/J d| j |d}|S )z
        Generate _page_id2num.

        Args:
            indirect_reference:

        Returns:
            The page number or None
        Nc                 S   s   i | ]	\}}|j j|qS rN   )r   idnum)r   r  xrN   rN   rO   r   4  s    z:PdfReader._get_page_number_by_indirect.<locals>.<dictcomp>r   )r   	enumeratepagesrX   r>   intr)  rW   )rU   r   r)  retrN   rN   rO   _get_page_number_by_indirect'  s   

z&PdfReader._get_page_number_by_indirectpagec                 C      |  |jS )a  
        Retrieve page number of a given PageObject.

        Args:
            page: The page to get page number. Should be
                an instance of :class:`PageObject<pypdf._page.PageObject>`

        Returns:
            The page number or None if page is not found
        )r/  r   )rU   r0  rN   rN   rO   get_page_numberB  s   zPdfReader.get_page_numberdestinationc                 C   r1  )z
        Retrieve page number of a given Destination object.

        Args:
            destination: The destination to get page number.

        Returns:
            The page number or None if page is not found
        )r/  r0  )rU   r3  rN   rN   rO   get_destination_page_numberO  rk   z%PdfReader.get_destination_page_numberr^   arrayc                 C   s   d\}}t |ttfst |trt|dks|d u r%t }t||t S |dd \}}|dd  }zt||t||dW S  tym   t	d| d| t
 | jrT | jd j}|d u rat n|}t||t  Y S w )NNNr   r	  )fit_typefit_argszUnknown destination:  )rX   r>   r|   r2   r   r6   r:   fitr/   r   rx   r   r,  r   )rU   r^   r5  r0  typtmpr   rN   rN   rO   r  [  s*   	zPdfReader._build_destinationc                 C   s  d\}}}z	t d|d }W n ty#   | jrtd|d}Y nw d|v rAt t|d }t t|tj }|dkr@|tj }nd|v rV|d }t	|trVd	|v rV|d	 }t	|t
rb| ||}nEt	|trz| || j| j}W n3 ty   | |d }Y n%w |d u r| ||}n| jrtd
|td|dt | |d }|rd|v rt
dd |d D |td< d|v r|d |td< d|v r|d |td< t|dddk|td< ||_z|j|_W |S  ty   Y |S w )NNNNr|   z/Titlez(Outline Entry Missing /Title attribute: r   z/Az/GoToz/Destr  zUnexpected destination zRemoved unexpected destination z from destinationz/Cc                 s   s    | ]}t |V  qd S rR   )r;   )r   crN   rN   rO   	<genexpr>  s    z0PdfReader._build_outline_item.<locals>.<genexpr>/Fr   r   z
/%is_open%)r   r   r   r/   r7   r=   r%   SDrX   r2   r  r|   r"  
dest_arrayr   rx   r3   rW   r  r   AttributeError)rU   r  r  r^   outline_itemactionaction_typerN   rN   rO   r#  {  sn   





zPdfReader._build_outline_itemc                 C   s   t | j| jS )zbRead-only property that emulates a list of :py:class:`PageObject<pypdf._page.PageObject>` objects.)r   r   r   rT   rN   rN   rO   r,    s   zPdfReader.pagesc                    s    fddt t jD S )z
        A list of labels for the pages in this document.

        This property is read-only. The labels are in the order that the pages
        appear in the document.
        c                    s   g | ]}t  |qS rN   )page_index2page_label)r   r  rT   rN   rO   r    s    z)PdfReader.page_labels.<locals>.<listcomp>)ranger   r,  rT   rN   rT   rO   page_labels  s   zPdfReader.page_labelsc                 C   s0   t t| jtj }tj|v rt t|tj S dS )a  
        Get the page layout currently being used.

        .. list-table:: Valid ``layout`` values
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right
        N)r   r7   r   r   r   r   PAGE_LAYOUTr=   )rU   r   rN   rN   rO   page_layout  s   
zPdfReader.page_layoutc                 C   s(   z	| j tj d W S  ty   Y dS w )a2  
        Get the page mode currently being used.

        .. list-table:: Valid ``mode`` values
           :widths: 50 200

           * - /UseNone
             - Do not show outline or thumbnails panels
           * - /UseOutlines
             - Show outline (aka bookmarks) panel
           * - /UseThumbs
             - Show page thumbnails panel
           * - /FullScreen
             - Fullscreen view
           * - /UseOC
             - Show Optional Content Group (OCG) panel
           * - /UseAttachments
             - Show attachments panel
        z	/PageModeN)r   r   r   r   rT   rN   rN   rO   	page_mode  s
   zPdfReader.page_moder,  inheritc                 C   sJ  t tjt tjt tjt tjf}|d u ri }|d u r-| jtj 	 }|d 	 }g | _
tj|v r8|tj }n
tj|vr@d}nd}|dkrz|D ]}||v rT|| ||< qH|tj D ]}i }	t|trg||	d< |	 }
|
rw| j|
|fi |	 qZd S |dkrt| D ]\}}||vr|||< qt| |}|| | j
| d S d S )Nr   z/Pager   )r=   PG	RESOURCESMEDIABOXCROPBOXROTATEr   r   r   r]   r   r   TYPEr   rX   r<   r   r   r   r   r   r   )rU   r,  rN  r   inheritable_page_attributesr   tr   r0  addtr   attr_inr  page_objrN   rN   rO   r     sN   




zPdfReader._flattenc                 C   s  | j |j \}}t|d|  }tt|d dksJ ||d k s#J tt| }t	|d D ]}t
| |dd t|}t
| |dd t|}t
| |dd ||jkraq1| jrl||krltd|t|d | d t
| |dd z
t|| }	W |	  S  ty }
 z)td	| d
|j d|j d|
 t | jrtd|
 t }	W Y d }
~
|	  S d }
~
ww | jrtdt S )Nr   /Typez/ObjStmr   r   zObject is in wrong index.r  zInvalid stream (index z) within object r9  r   zCan't read object stream: z%This is a fatal error in strict mode.)xref_objStmr)  r<   r]   r   r|   r   r   get_datarI  r   r   r?   read_from_streamr   r/   r-  rD   r0   r   
generationrx   r>   )rU   r   stmnumidxobj_stmstream_datar  objnumoffsetr   excrN   rN   rO   _get_object_from_stream@  sX   


z!PdfReader._get_object_from_streamnumgenc                 C   s   t |||  S )a/  
        Used to ease development.

        This is equivalent to generic.IndirectObject(num,gen,self).get_object()

        Args:
            num: The object number of the indirect object.
            gen: The generation number of the indirect object.

        Returns:
            A PdfObject
        )r<   r]   )rU   rh  ri  rN   rN   rO   _get_indirect_objectv  s   zPdfReader._get_indirect_objectc           	   
   C   s2  t |trt|d| }| |j|j}|d ur|S |jdkr+|j| jv r+| |}n|j| jv rP|j| j|j v rP| j	
|ji 
|jdrLt S | j|j |j }| j|d z
| | j\}}W nw ty   t| jdr{t| j }n| j }| jdd | jd}| j|d td|j d|j d |}|d urtd|j d	|j d
t |dd | j|j |j< | j|dd  | | j\}}nd}Y nw ||jkr| jr| jrtd|j d|j d| d| d	n||jkr| jrtd|j d|j d| d| d	| jr&||jks&J t| j| }| jsO| j d urO| j ! s@t"dt#t$|}| j %||j|j}nt| jdr_t| j }n| j }| jdd | jd}| j|d td|j d|j d |}|d urtd|j d|j dt |j| jvri | j|j< |dd | j|j |j< | j|&dd  t'| j | jdd t| j| }| js| j d ur| j ! st"dt#t$|}| j %||j|j}ntd|j d|j dt | jrtd| (|j|j| |S )Nr   F	getbufferr[  z\sz\s+z\s+objz
Object ID ,z ref repairedr   zExpected object ID (r9  z) does not match actual (z); xref table not zero-indexed.z).zFile has not been decryptedzObject z foundz not defined.zCould not find object.))rX   r-  r<   cache_get_indirect_objectr_  r)  r\  rg  xrefxref_free_entryrW   r>   r   r   read_object_header	Exceptionr   bytesrk  r   r   researchencoder   rx   startr   r   r/   rD   r   r   is_decryptedr.   r   r@   decrypt_objectendr!   cache_indirect_object)	rU   r   rY   rv  r)  r_  bufpmrN   rN   rO   r]     s   












zPdfReader.get_objectc                 C   s   d}t | |t|O }|dd t|}|t|O }|dd t|}|t|O }|dd |d}t| |dd |rQ| jrQtd| d| t t	|t	|fS )NFr[  r      z.Superfluous whitespace found in object header r9  )
r    r!   r   r   r   r   r   r   rx   r-  )rU   r   extrar)  r_  _objrN   rN   rO   rp    s&   

zPdfReader.read_object_headerr_  r)  c                 C   s   | j ||fS rR   )r   rW   )rU   r_  r)  rN   rN   rO   rm  #  rs   z#PdfReader.cache_get_indirect_objectr   c                 C   s^   ||f| j v rd| d| }| jrt|t|t || j ||f< |d ur-t||| |_|S )NzOverwriting cache for r9  )r   r   r/   r   rx   r<   r   )rU   r_  r)  r   msgrN   rN   rO   rz  (  s   
zPdfReader.cache_indirect_objectc              
   C   s.  |  | | | | |}| ||}|dkr+| jr"|r"tdtd| dt | ||| | j	r| js|
 }| j D ]I\}}|dkrJqAt| }|D ]7}||| d z	| |\}	}
W n
 tyo   Y  nw |	|| j	 kr| j| | | j| |	< | j| |= qRqA||d d S d S d S )Nr   zBroken xref tablezincorrect startxref pointer()  )_basic_validation_find_eof_marker_find_startxref_pos_get_xref_issuesr   r/   r   rx   _read_xref_tables_and_trailersr   r   rn  r   sortedr   r   rp  
ValueError)rU   r   	startxrefxref_issue_nrr   ri  
xref_entryxref_kidpid_pgenrN   rN   rO   r   5  s<   



zPdfReader.readc                 C   s   | dtj z|d}W n ty   tdw |dkr"td|dkr<| jr4td|	d d	t
d
| t | dtj dS )z/Ensure file is not empty. Read at most 5 bytes.r      zcannot read headerr   zCannot read an empty files   %PDF-zPDF starts with 'utf8z', but '%PDF-' expectedzinvalid pdf header: N)r   osSEEK_SETr   UnicodeDecodeErrorr   r-   r   r/   r   r   rx   SEEK_END)rU   r   header_byterN   rN   rO   r  [  s   zPdfReader._basic_validationc                 C   s\   d}d}|dd dkr,|  |k r| jrtdtdt t|}|dd dksdS dS )a  
        Jump to the %%EOF marker.

        According to the specs, the %%EOF marker should be at the very end of
        the file. Hence for standard-compliant PDF documents this function will
        read only the last part (DEFAULT_BUFFER_SIZE).
        rK   r   Nr  s   %%EOFzEOF marker not found)r   r   r/   r   rx   r   )rU   r   HEADER_SIZElinerN   rN   rO   r  n  s   
zPdfReader._find_eof_markerc                 C   s~   t |}zt|}W n" ty,   |dstdt|dd  }tdt Y |S w t |}|dd dkr=td|S )z
        Find startxref entry - the location of the xref table.

        Args:
            stream:

        Returns:
            The bytes offset
        s	   startxrefzstartxref not found	   Nz startxref on same line as offset)r   r-  r  r  r/   stripr   rx   )rU   r   r  r  rN   rN   rO   r    s   

zPdfReader._find_startxref_posc                 C   s  | d}|dkrtdt| |dd d}	 ttt|| }|r3|dkr3|| _| jr3t	dt
 d	}t| |dd ttt|| }t| |dd d}||k rO| d
}|d dv rt|dd | d
}|d dv sc|d dv r|dd z|d d d\}}	|dd }
t|t|	}}W nZ ty   t|drt| }n| }|dd | d}|| t| d |}|d u rt	d| dt
 d}d}nt	d| dt
 t|d}| }Y nw || jvri | j|< i | j|< || j| v rn3|| j| |< z|
dk| j| |< W n
 ty,   Y nw z|
dk| jd |< W n
 tyB   Y nw |d7 }|d7 }||k sXt| |dd | d}|dkrj|dd nd S q)Nr~  s   refzxref table read errorr[  r   Tr   zFXref table not zero-indexed. ID numbers for objects will be corrected.F      
is   0123456789t             rk  z\s+(\d+)\s+objzentry z( in Xref table invalid; object not foundr  z' in Xref table invalid but object found   f   s   traileri)r   r/   r   r   r   r-  rD   r   r   r   rx   splitrq  r   rr  rk  r   rs  rt  ru  grouprv  rn  ro  )rU   r   ref
first_timerh  rI   cntr  offset_bgeneration_bentry_type_bre  r_  r{  r|  r   trailer_tagrN   rN   rO   _read_standard_xref_table  s   











J

z#PdfReader._read_standard_xref_tabler  r  c           
   
   C   s  i | _ i | _i | _t | _|d ur||d |d}|dv r%|d}|dkr/| |}n|rEz| | W d S  t	yD   d}Y nw |
 rz| |}W n* t	yz } ztj| jv rntd|j t W Y d }~d S td|j d }~ww tjtjtjtjtjf}|D ]}||v r|| jvr||| jt|< qd|v r| }	|tt|d d d | | ||	d d|v rtt|d }nd S | ||}|d usd S d S )	Nr   r   r     xz!Previous trailer can not be read ztrailer can not be read /XRefStm/Prev)rn  ro  r\  r7   r   r   r   
_read_xref_rebuild_xref_tablerq  isdigit_read_pdf15_xref_streamr   r   r   argsrx   r/   r   r   r   SIZEraw_getr=   r   r   r-  _read_xref_other_error)
rU   r   r  r  r*  
xrefstreametrailer_keysrV   r|  rN   rN   rO   r    s\   



	
z(PdfReader._read_xref_tables_and_trailersc                 C   s   |  | t| |dd ttttf t|| }| D ]\}}|| j	vr-|| j	|< qd|v re|
 }|tt|d d d z| | W n ty^   td|d  dt Y nw ||d d|v ro|d }|S d S )Nr[  r   r  r   zXRef object at z, can not be read, some object may be missingr  )r  r   r   r   r   r|   r   rD   r   r   r   r-  r  rq  r   rx   )rU   r   new_trailerrV   r  r|  r  rN   rN   rO   r  7  s0   


zPdfReader._read_xrefc                 C   s   |dkr| j rtdtdt d S |dd |d}|d}|dkr.|d	| 8 }|S ||d td
D ]}|d rI||7 }|  S q8d| j	v rk| j sktdt z| 
| W d S  tyj   tdw td)Nr   z6/Prev=0 in the trailer (try opening with strict=False)zA/Prev=0 in the trailer - assuming there is no previous xref tableir   r     xrefr[  
      r   z"Invalid parent xref., rebuild xrefzcan not rebuild xrefz/Could not find xref table at specified location)r   r/   r   rx   r   r   findrI  r  r   r  rq  )rU   r   r  r<  xref_loclookrN   rN   rO   r  P  s>   



z PdfReader._read_xref_other_errorc                    s   | dd |\}}ttt|}tt|d dks J ||| tt|	 |
dd|
dg}ttttf |
d t d	ksNJ jr^t d	kr^td
  dtdttttdf f f fdd}dtdttttdf f dtffdd}||| |S )Nr[  r   rZ  z/XRefz/Indexr   z/Sizez/Wr~  zToo many entry sizes: r  rJ   .c                    s8    |  dkr  |  }t| |  S | dkrdS dS )Nr   r   )r   rP   )r  rH   )entry_sizesrc  rN   rO   	get_entry  s   z4PdfReader._read_pdf15_xref_stream.<locals>.get_entryrh  r_  c                    s   |  j |g v p|  jv S rR   )rn  rW   r\  )rh  r_  rT   rN   rO   used_before  s   z6PdfReader._read_pdf15_xref_stream.<locals>.used_before)r   rp  r   r4   rD   r|   rz  r   r   r]  rW   r   r   r   r   r/   r-  r   r   bool_read_xref_subsections)rU   r   r)  r_  r  	idx_pairsr  r  rN   )r  rU   rc  rO   r  y  s   (*z!PdfReader._read_pdf15_xref_streamc                 C   s   |  |d d | d}|dkr| d}|dvrdS | d}|dkrId}|dv r:| d}|dkr6d	S |dv s+|| d	7 }| d
krIdS dS )z
        Return an int which indicates an issue. 0 means there is no issue.

        Args:
            stream:
            startxref:

        Returns:
            0 means no issue, other values represent specific issues.
        r   r      js   
 	   r  r   s   0123456789 	r	  s   objr~  )r   r   lower)r   r  r  rN   rN   rO   r    s$   



zPdfReader._get_xref_issuesc           	      C   s   i | _ |dd |d}td|D ]$}t|d}t|d}|| j vr.i | j |< |d| j | |< q|dd td|D ]'}||dd tt	t
t
f t|| }t| D ]	\}}|| j|< qbqEd S )Nr   r[  s(   [\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+objr   r	  s$   [\r\n \t][ \t]*trailer[\r\n \t]*(<<))rn  r   r   rs  finditerr-  r  rv  r   r   r   rD   r   r   r   )	rU   r   f_r}  r)  r_  r  rV   r  rN   rN   rO   r    s"   


zPdfReader._rebuild_xref_tabler  r  .r  c                 C   s   |  |D ]g\}}t||| D ][}|d}|dkr#|d}|d}	q|dkrF|d}
|d}|| jvr9i | j|< |||sE|
| j| |< q|dkra|d}|d}d}|||s`||f| j|< q| jrktd| qqd S )Nr   r   r	  zUnknown xref type: )_pairsrI  rn  r\  r   r/   )rU   r  r  r  rv  rI   rh  	xref_typenext_free_objectnext_generationbyte_offsetr_  
objstr_num	obstr_idxrN   rN   rO   r    s4   




z PdfReader._read_xref_subsectionsc                 c   s<    d}	 || ||d  fV  |d7 }|d t |krd S q)Nr   Tr   r	  )r   )rU   r5  r  rN   rN   rO   r    s   zPdfReader._pairsc                 C   s   | j std| j |S )a  
        When using an encrypted / secured PDF file with the PDF Standard
        encryption handler, this function will allow the file to be decrypted.
        It checks the given password against the document's user password and
        owner password, and then stores the resulting decryption key if either
        password is correct.

        It does not matter which password was matched.  Both passwords provide
        the correct decryption key that will allow the document to be used with
        this library.

        Args:
            password: The password to match.

        Returns:
            An indicator if the document was decrypted and weather it was the
            owner password or the user password.
        r   )r   r/   r   )rU   r   rN   rN   rO   decrypt  s   zPdfReader.decryptpermissions_codec              	      sJ   t dddd tjtjtjtjtjtjtjtj	d} fdd|
 D S )z>Take the permissions as an integer, return the allowed access.decode_permissionsuser_access_permissionsz5.0.0)old_namenew_name
removed_in)printmodifycopyannotationsformsaccessabilityassembleprint_high_qualityc                    s   i | ]\}}| |@ d kqS r   rN   )r   rV   flagr  rN   rO   r   &  s    z0PdfReader.decode_permissions.<locals>.<dictcomp>)r   r&   PRINTMODIFYEXTRACTADD_OR_MODIFYFILL_FORM_FIELDSEXTRACT_TEXT_AND_GRAPHICSASSEMBLE_DOCPRINT_TO_REPRESENTATIONr   )rU   r  permissions_mappingrN   r  rO   r    s   
zPdfReader.decode_permissionsc                 C   s   | j du rdS t| j jS )zWGet the user access permissions for encrypted documents. Returns None if not encrypted.N)r   r&   PrT   rN   rN   rO   r  +  s   
z!PdfReader.user_access_permissionsc                 C   s   t j| jv S )z
        Read-only boolean property showing whether this PDF file is encrypted.

        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
        )r   r   r   rT   rN   rN   rO   r   2  rd   zPdfReader.is_encryptedc           
      C   s   d }i }t t| jtj }d|vs|d sd S t t|d }d|v rTt t|d }t|}|D ]$}|}t|}t	|t
rSt tt | }|rStt|j}	|	||< q/|S )N	/AcroFormz/XFA)r   r7   r   r   r   rB   r2   iternextrX   r<   r   r8   r]   zlib
decompressr   _data)
rU   r   rY   r   r   r  r   tagr   esrN   rN   rO   xfa<  s&   
zPdfReader.xfar  c                 C   s   t t| jtj }d|vst|d tsdS t t|td }d|vr%dS t }t||td< |td |td< | dt	dd | j
D d	 | t }||j ||td< t t|d D ]}| }d
|v rutd|j dt |j|td
< qa|S )z
        Add a top level form that groups all form fields below it.

        Args:
            name: text string of the "/T" Attribute of the created object

        Returns:
            The created object. ``None`` means no object was created.
        r  Nr   r   z/Kidsr   c                 S   s   g | ]
\}}|d kr|qS r  rN   )r   gr  rN   rN   rO   r  n  s    z.PdfReader.add_form_topname.<locals>.<listcomp>r   r   zTop Level Form Field z have a non-expected parent)r   r7   r   r   r   rX   r=   rA   rz  maxr   r2   r   r   r]   r   rx   )rU   r  r   acroforminterimarrr   r   rN   rN   rO   add_form_topnameT  s8   

zPdfReader.add_form_topnamec                 C   s~   t t| jtj }d|vst|d tsdS t t|td }d|vr%dS t tt t|td d  }t	||td< |S )z
        Rename top level form field that all form fields below it.

        Args:
            name: text string of the "/T" field of the created object

        Returns:
            The modified object. ``None`` means no object was modified.
        r  Nr   r   r   )
r   r7   r   r   r   rX   r=   r2   r]   rA   )rU   r  r   r  r  rN   rN   rO   rename_form_topname~  s   

zPdfReader.rename_form_topnamec                    s   t  fdd  D S )Nc                    s   i | ]}| j |fqS rN   )_get_attachment_list)r   r  rT   rN   rO   r     s    
z)PdfReader.attachments.<locals>.<dictcomp>)LazyDict_list_attachmentsrT   rN   rT   rO   attachments  s
   
zPdfReader.attachmentsc                 C   s`   t t| jd }zt tt tt t|d d d }W n ty&   g  Y S w dd |D }|S )zv
        Retrieves the list of filenames of file attachments.

        Returns:
            list of filenames
        r   /Names/EmbeddedFilesc                 S   s   g | ]	}t |tr|qS rN   )rX   r|   )r   r   rN   rN   rO   r    s    z/PdfReader._list_attachments.<locals>.<listcomp>)r   r7   r   r2   r   )rU   r   	filenamesattachments_namesrN   rN   rO   r    s    zPdfReader._list_attachmentsc                 C   s"   |  || }t|tr|S |gS rR   )_get_attachmentsrX   r   )rU   r  outrN   rN   rO   r    s   
zPdfReader._get_attachment_listfilenamec           
      C   s   t t| jd }zt tt tt t|d d d }W n ty&   i  Y S w i }tt|D ]D}|| }t|trs|durC||krCq/|}||d  	 }|d d 
 }	||v rot|| tsg|| g||< || |	 q/|	||< q/|S )a  
        Retrieves all or selected file attachments of the PDF as a dictionary of file names
        and the file data as a bytestring.

        Args:
            filename: If filename is None, then a dictionary of all attachments
                will be returned, where the key is the filename and the value
                is the content. Otherwise, a dictionary with just a single key
                - the filename - and its content will be returned.

        Returns:
            dictionary of filename -> Union[bytestring or List[ByteString]]
            if the filename exists multiple times a List of the different version will be provided
        r   r  r  Nr   z/EFr@  )r   r7   r   r2   r   rI  r   rX   r|   r]   r]  r   r   )
rU   r  r   r  r  r  r   r  f_dictf_datarN   rN   rO   r
    s<   
zPdfReader._get_attachments)FNr6  r=  )FrR   )drx   ry   rz   r{   r}   r   rC   r   r   r   r   r  r|   rr  rS   r	   r   r   r   r   rQ   r   rG   r   r-  r   r   r   r   rB   r   r7   r   r   r   r   r  r   rE   r  r  r2   r(  r>   r<   r/  r2  r6   r4  r   r?   r  r#  r,  rJ  rL  rF   rM  r   r@   rg  rj  r]   r   r   rp  rm  rz  r   r  r  r  r  r  r  r  r4   r8   r5   r  staticmethodr  r  r   r  r  r   r  r  r&   r  r   r  r   r  r   r  r  r  r
  rN   rN   rN   rO   r~     s   

6


2


6

%(


F

)

 K	
5
6

 

&m
1
)
(
""	*r~   c                   @   sb   e Zd ZdededdfddZdedefdd	Zdee fd
dZde	fddZ
defddZdS )r  r  kwrJ   Nc                 O   s   t |i || _d S rR   )dict	_raw_dict)rU   r  r  rN   rN   rO   rS        zLazyDict.__init__rV   c                 C   s   | j |\}}||S rR   )r  __getitem__)rU   rV   funcargrN   rN   rO   r    s   zLazyDict.__getitem__c                 C   
   t | jS rR   )r  r  rT   rN   rN   rO   __iter__     
zLazyDict.__iter__c                 C   r  rR   )r   r  rT   rN   rN   rO   __len__  r  zLazyDict.__len__c                 C   s   dt |   dS )NzLazyDict(keys=r  )r   r   rT   rN   rN   rO   __str__  r  zLazyDict.__str__)rx   ry   rz   r   rS   r|   r  r
   r  r-  r  r  rN   rN   rN   rO   r    s    r  )dr  rs  rL   r  r   ior   r   pathlibr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   _pager   r   _page_labelsr   rH  _utilsr   r   r   r   r   r   r   r   r   r    r!   	constantsr"   r  r#   r   r$   r%   r&   r'   r   r(   r[   r)   r   r*   rO  r+   r   r,   r   errorsr-   r.   r/   r0   r1   genericr2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   r  rE   rF   xmprG   rr  r-  rP   rQ   r~   r  rN   rN   rN   rO   <module>   sV   44T&              t