o
    fH9                     @   sT  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d dl m"Z" d dl m#Z# d dl$m%Z% dZ&edduZ'e'rd dl(m)Z) edduZ*e*rd dl+Z,dZ-e .dZ/dZ0h dZ1de2fde2fd e2fd!e3fd"e4fd#e3fd$Z5e#d%Z6e#d&Z7G d'd( d(Z8G d)d* d*e9Z:G d+d, d,e;Z<G d-d. d.e9Z=G d/d0 d0e9Z>G d1d2 d2e9Z?d3d4 Z@edgd5d6ZAd7d8 ZBd9d: ZCd;d< ZDd=d> ZEed?d@ ZFdhdBdCZG	D	 	 didEdFZH	D		D	 	 	GdjdHdIZIdJdK ZJdLdM ZKdNdO ZLeBdkdPdQZMeBdRdS ZNddDd e8jOd fdTdUZP		D	 	V	 dldWdXZQ		D	 	 dmdYdZZRddDd e8jOd fd[d\ZSdgd]d^ZTddDd e8jOd dfd_d`ZUdadDd e8jOd fdbdcZVddde ZWeXdfkreYeW  dS dS )n    N)contextmanager)
QUOTE_NONE)ENOENT)wraps)iglob)BytesIO)environ)extsep)linesep)remove)normcase)normpath)realpath)find_loader)NamedTemporaryFile)sleep)InvalidVersion)parse)Version)Image	tesseractnumpy)ndarraypandaszutf-8z	^[a-z_]+$RGB>
   BMPGIFPBMPGMPNGPPMJPEGTIFFWEBPJPEG2000page_numorientationrotateorientation_confscriptscript_conf)zPage numberzOrientation in degreesRotatezOrientation confidenceScriptzScript confidencez3.05z4.1.0c                   @   s   e Zd ZdZdZdZdZdS )Outputbytesz
data.framedictstringN)__name__
__module____qualname__BYTES	DATAFRAMEDICTSTRING r8   r8   M/var/www/html/dev/env/lib/python3.10/site-packages/pytesseract/pytesseract.pyr-   E   s
    r-   c                          e Zd Z fddZ  ZS )PandasNotSupportedc                       t  d d S )NzMissing pandas packagesuper__init__self	__class__r8   r9   r?   M   s   zPandasNotSupported.__init__r1   r2   r3   r?   __classcell__r8   r8   rB   r9   r;   L       r;   c                   @   s   e Zd Zdd ZdS )TesseractErrorc                 C   s   || _ || _||f| _d S N)statusmessageargs)rA   rI   rJ   r8   r8   r9   r?   R   s   zTesseractError.__init__N)r1   r2   r3   r?   r8   r8   r8   r9   rG   Q   s    rG   c                       r:   )TesseractNotFoundErrorc                    s   t  t d d S )NzQ is not installed or it's not in your PATH. See README file for more information.)r>   r?   tesseract_cmdr@   rB   r8   r9   r?   Y   s   zTesseractNotFoundError.__init__rD   r8   r8   rB   r9   rL   X   rF   rL   c                       r:   )TSVNotSupportedc                    r<   )Nz4TSV output not supported. Tesseract >= 3.05 requiredr=   r@   rB   r8   r9   r?   a      zTSVNotSupported.__init__rD   r8   r8   rB   r9   rN   `   rF   rN   c                       r:   )ALTONotSupportedc                    r<   )Nz6ALTO output not supported. Tesseract >= 4.1.0 requiredr=   r@   rB   r8   r9   r?   h   rO   zALTONotSupported.__init__rD   r8   r8   rB   r9   rP   g   rF   rP   c                 C   s   |    z;z| d W n ty   td Y n ty!   Y nw W |   || _d S W |   || _d S W |   || _d S |   || _w )N   )	terminatewait	TypeErrorr   	Exceptionkill
returncode)processcoder8   r8   r9   rV   n   s&   


rV   c                 c   s    zL|s|   d V  W | j  | j  | j  d S z| j |d\}}|V  W n tjy;   t| d tdw W | j  | j  | j  d S | j  | j  | j  w )NrQ   )timeoutzTesseract process timeout)	communicatestdinclosestdoutstderr
subprocessTimeoutExpiredrV   RuntimeError)procseconds_error_stringr8   r8   r9   timeout_manager{   s,   
	






rh   c                    s    t   fdd_S )Nc                     s    j u r | i |_ j S rH   )_result)rK   kwargsfuncwrapperr8   r9   rm      s   
zrun_once.<locals>.wrapper)r   ri   )rl   r8   rk   r9   run_once   s   rn   c                 C   s"   d dd | t D  S )N c                 s   s    | ]}|V  qd S rH   r8   .0liner8   r8   r9   	<genexpr>   s    
zget_errors.<locals>.<genexpr>)joindecodeDEFAULT_ENCODING
splitlinesstrip)rg   r8   r8   r9   
get_errors   s
   
ry   c                 C   s\   t | r|  dn| D ] }zt| W q ty+ } z|jtkr! W Y d}~qd}~ww dS )z5Tries to remove temp files by filename wildcard path.*N)r   r   OSErrorerrnor   )	temp_namefilenameer8   r8   r9   cleanup   s   
r   c                 C   s   t rt| trt| } t| tjstd| jsdn| j}|tvr&tdd|  v r@t	t
| jd}|| d| d |} || _| |fS )NzUnsupported image objectr   zUnsupported image format/typeA)   r   r   )r   r   )numpy_installed
isinstancer   r   	fromarrayrT   formatSUPPORTED_FORMATSgetbandsnewRGB_MODEsizepaste
getchannel)image	extension
backgroundr8   r8   r9   prepare   s   
r   c                 c   s    zdt dddF}t| tr)|jttt| fV  	 W d    W t|j d S t| \} }|j dt	 | }| j
|| jd |j|fV  W d    n1 sQw   Y  W t|j d S W t|j d S t|j w )Ntess_F)prefixdelete_input)r   )r   r   strnamer   r   r   r   r   r	   saver   )r   fr   input_file_namer8   r8   r9   r      s    
			r   Tc                 C   sh   t jt jd td}tt dr$t  |d< |d  jt jO  _t j|d _| r-t j|d< |S t j	|d< |S )N)r]   r`   startupinfoenvSTARTUPINFOr   r_   )
ra   PIPEr   hasattrr   dwFlagsSTARTF_USESHOWWINDOWSW_HIDEwShowWindowDEVNULL)include_stdoutrj   r8   r8   r9   subprocess_args   s   


r    c              
   C   s   g }t jds|dkr|ddt|f7 }|t| |f7 }|d ur&|d|f7 }|r/|t|7 }|r:|dvr:|| ztj	|fi t
 }W n ty[ }	 z	|	jtkrT t d }	~	ww t||}
|jrmt|jt|
W d    d S 1 sxw   Y  d S )Nwin32r   nicez-n-l>   boxosdtsvxml)sysplatform
startswithr   rM   shlexsplitappendra   Popenr   r{   r|   r   rL   rh   rW   rG   ry   )input_filenameoutput_filename_baser   langconfigr   rZ   cmd_argsrd   r   rg   r8   r8   r9   run_tesseract   s.   	

"r   Fc              	   C   s   t | ]\}}|||||||d}	tdi |	 |	d  t | }
t|
d*}|r<| W  d    W  d    S | tW  d    W  d    S 1 sTw   Y  W d    d S 1 sdw   Y  d S )N)r   r   r   r   r   r   rZ   r   rbr8   )r   r   r	   openreadru   rv   )r   r   r   r   r   rZ   return_bytesr}   r   rj   r~   output_filer8   r8   r9   run_and_get_output  s*   

"r   c              
      s   i } fdd|   dD }t|dk r|S |d}t|}t|d |k r0|d d |dk r8||7 }t|D ]<\}}t ||< |D ]0}	t|	|krPqG||krlz
tt|	| }
W n t	yk   |	| }
Y nw |	| }
|| |
 qGq<|S )Nc                    s   g | ]}|  qS r8   r   )rq   rowcell_delimiterr8   r9   
<listcomp>*  s    z file_to_dict.<locals>.<listcomp>
   r   r[   r   )
rx   r   lenpopr   	enumeratelistintfloat
ValueError)r   r   str_col_idxresultrowsheaderlengthiheadr   valr8   r   r9   file_to_dict(  s2   

r   c                 C   s@   |t u r|  S |tu rzt|  W dS  ty   Y dS w dS )NTF)r   isdigitr   r   )r   _typer8   r8   r9   is_validK  s   r   c                 C   s   dd dd |  dD D S )Nc                 S   sX   i | ](}t |d krt|d t|d  d rt|d  d t|d  d |d qS )r   rQ   r   )r   r   OSD_KEYS)rq   kvr8   r8   r9   
<dictcomp>Z  s
    &$zosd_to_dict.<locals>.<dictcomp>c                 s   s    | ]}| d V  qdS ): Nr   rp   r8   r8   r9   rs   \  s    zosd_to_dict.<locals>.<genexpr>r   r   )r   r8   r8   r9   osd_to_dictY  s   r   c                 C   s   t dg}| r|t| 7 }ztj|tjtjd}W n
 ty#   t w |j	dvr,t g }|j
rK|j
ttD ]}| }t|rJ|| q:|S )Nz--list-langs)r_   r`   )r   rQ   )rM   r   r   ra   runr   STDOUTr{   rL   rW   r_   ru   rv   r
   rx   LANG_PATTERNmatchr   )r   r   r   	languagesrr   r   r8   r8   r9   get_languagesa  s,   



r   c               	   C   s   zt jtdgt jtt jd} W n
 ty   t w | t	}|
tjdd d^}}|d^}}zt|}|tks@J W |S  ttfyS   td| dw )	z9
    Returns Version object of the Tesseract version
    z	--version)r`   r   r]   
   Nro   -zInvalid tesseract version: "")ra   check_outputrM   r   r   r   r{   rL   ru   rv   lstripr0   	printable	partitionr   TESSERACT_MIN_VERSIONAssertionErrorr   
SystemExit)outputraw_versionstr_versionrf   versionr8   r8   r9   get_tesseract_version~  s(   

r   c                    sD   | d||||g t j fddt j fddt j fddi|  S )zS
    Returns the result of a Tesseract OCR run on the provided image to string
    txtc                         t  dg  S NTr   r8   rK   r8   r9   <lambda>      z!image_to_string.<locals>.<lambda>c                      s   dt   iS )Ntextr   r8   r   r8   r9   r         c                         t   S rH   r   r8   r   r8   r9   r         )r-   r4   r6   r7   r   r   r   r   output_typerZ   r8   r   r9   image_to_string  s   r  pdfc                 C   s0   |dvrt d| | |||||dg}t| S )zU
    Returns the result of a Tesseract OCR run on the provided image to pdf/hocr
    >   r  hocrzUnsupported extension: T)r   r   )r   r   r   r   r   rZ   rK   r8   r8   r9   image_to_pdf_or_hocr  s   r  c                 C   s8   t  tk rt d|  }| d||||dg}t| S )zU
    Returns the result of a Tesseract OCR run on the provided image to ALTO XML
    z-c tessedit_create_alto=1 r   T)r   TESSERACT_ALTO_VERSIONrP   rx   r   )r   r   r   r   rZ   rK   r8   r8   r9   image_to_alto_xml  s
   
r  c                    sR   |   d}| d||||g tj fddtj fddtj fddi|  S )zR
    Returns string containing recognized characters and their box boundaries
    z batch.nochop makeboxr   c                      r   r   r   r8   r   r8   r9   r     r   z image_to_boxes.<locals>.<lambda>c                      s   t dt   ddS )Nz char left bottom right top page
ro   r   r   r   r8   r   r8   r9   r     s
    c                      r   rH   r   r8   r   r8   r9   r     r   rx   r-   r4   r6   r7   r  r8   r   r9   image_to_boxes  s   r  c              	   C   sT   t st tdd}z|| W n ttfy   Y nw tjtt	|  fi |S )N	)quotingsep)
pandas_installedr;   r   updaterT   r   pdread_csvr   r   )rK   r   rj   r8   r8   r9   get_pandas_output  s   
r  c              
      sr   t  tk rt d|  }| d||||g tj fddtj fddtj fddtj fddi|  S )zt
    Returns string containing box boundaries, confidences,
    and other information. Requires Tesseract 3.05+
    z-c tessedit_create_tsv=1 r   c                      r   r   r   r8   r   r8   r9   r     r   zimage_to_data.<locals>.<lambda>c                      s   t  dg S r   )r  r8   rK   pandas_configr8   r9   r     s    c                      s   t t  ddS )Nr  r[   r	  r8   r   r8   r9   r     s    c                      r   rH   r   r8   r   r8   r9   r     r   )	r   r   rN   rx   r-   r4   r5   r6   r7   )r   r   r   r   r  rZ   r  r8   r  r9   image_to_data  s   
r  r   c                    sR   d|   }| d||||g tj fddtj fddtj fddi|  S )zN
    Returns string containing the orientation and script detection (OSD)
    z--psm 0 r   c                      r   r   r   r8   r   r8   r9   r   )  r   zimage_to_osd.<locals>.<lambda>c                      s   t t  S rH   )r   r   r8   r   r8   r9   r   *  r   c                      r   rH   r   r8   r   r8   r9   r   +  r   r
  r  r8   r   r9   image_to_osd  s   r  c               
   C   s,  t tjdkrtjd d } }n#t tjdkr*tjd dkr*tjd tjd } }n	tdtjd dS z"t| }tt||d W d    W d S 1 sNw   Y  W d S  tyt } ztt	| d	tjd W Y d }~dS d }~w t
y } ztt|j d
| tjd W Y d }~dS d }~ww )Nr   rQ      r      z(Usage: pytesseract [-l lang] input_file
)file)r   r   r   )r   r   argvprintr`   r   r   r  rL   r   r{   typer1   )r~   r   imgr   r8   r8   r9   main/  s&   &r  __main__rH   )T)r   r   r   )r   Nr   r   r   F)r   )Nr   r   r  r   )Nr   r   r   )Zrer   r0   ra   r   
contextlibr   csvr   r|   r   	functoolsr   globr   ior   osr   r	   r
   r   os.pathr   r   r   pkgutilr   tempfiler   timer   packaging.versionr   r   r   PILr   rM   r   r   r   r  r   r  rv   compiler   r   r   r   r   r   r   r   r  r-   EnvironmentErrorr;   rc   rG   rL   rN   rP   rV   rh   rn   ry   r   r   r   r   r   r   r   r   r   r   r   r7   r  r  r  r  r  r  r  r  r1   exitr8   r8   r8   r9   <module>   s   
	



(
#






!

