o
    >e9!                     @   s<   d dl mZmZ ddlmZ ddlmZ G dd deZdS )    )ListUnion   )CharSetProber)ProbingStatec                       s  e Zd ZdZdZdZd' fddZd' fdd	Zede	fd
dZ
ede	fddZdefddZdefddZdefddZdefddZdefddZdefddZdee ddfddZdee ddfddZd eeef defd!d"Zedefd#d$Zdefd%d&Z  ZS )(UTF1632Proberad  
    This class simply looks for occurrences of zero bytes, and infers
    whether the file is UTF16 or UTF32 (low-endian or big-endian)
    For instance, files looking like (       [nonzero] )+
    have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
    may be guessed to be UTF16BE, and inversely for little-endian varieties.
       gGz?returnNc                    sj   t    d| _dgd | _dgd | _tj| _g d| _d| _	d| _
d| _d| _d| _d| _|   d S )Nr      r   r   r   r   F)super__init__positionzeros_at_modnonzeros_at_modr   	DETECTING_statequadinvalid_utf16beinvalid_utf16leinvalid_utf32beinvalid_utf32le'first_half_surrogate_pair_detected_16be'first_half_surrogate_pair_detected_16leresetself	__class__ V/var/www/html/humari/django-venv/lib/python3.10/site-packages/chardet/utf1632prober.pyr   )   s   

zUTF1632Prober.__init__c                    sb   t    d| _dgd | _dgd | _tj| _d| _d| _	d| _
d| _d| _d| _g d| _d S )Nr   r
   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   8   s   
zUTF1632Prober.resetc                 C   s4   |   rdS |  rdS |  rdS |  rdS dS )Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)is_likely_utf32beis_likely_utf32leis_likely_utf16beis_likely_utf16ler   r   r   r    charset_nameF   s   zUTF1632Prober.charset_namec                 C   s   dS )N r   r   r   r   r    languageS   s   zUTF1632Prober.languagec                 C      t d| jd S )N      ?g      @maxr   r   r   r   r    approx_32bit_charsW      z UTF1632Prober.approx_32bit_charsc                 C   r(   )Nr)   g       @r*   r   r   r   r    approx_16bit_charsZ   r-   z UTF1632Prober.approx_16bit_charsc                 C   sj   |   }|| jko4| jd | | jko4| jd | | jko4| jd | | jko4| jd | | jko4| j S Nr   r         )r,   MIN_CHARS_FOR_DETECTIONr   EXPECTED_RATIOr   r   r   approx_charsr   r   r    r!   ]      
zUTF1632Prober.is_likely_utf32bec                 C   sj   |   }|| jko4| jd | | jko4| jd | | jko4| jd | | jko4| jd | | jko4| j S r/   )r,   r2   r   r3   r   r   r4   r   r   r    r"   g   r6   zUTF1632Prober.is_likely_utf32lec                 C   V   |   }|| jko*| jd | jd  | | jko*| jd | jd  | | jko*| j S )Nr   r1   r   r0   )r.   r2   r   r3   r   r   r4   r   r   r    r#   q      
zUTF1632Prober.is_likely_utf16bec                 C   r7   )Nr   r0   r   r1   )r.   r2   r   r3   r   r   r4   r   r   r    r$   {   r8   zUTF1632Prober.is_likely_utf16ler   c                 C   s   |d dks&|d dks&|d dkr)|d dkr)d|d   kr$dkr)n nd| _ |d dksP|d dksP|d dkrU|d dkrWd|d   krMdkrYn d	S d| _d	S d	S d	S d	S )
z
        Validate if the quad of bytes is valid UTF-32.

        UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
        excluding 0x0000D800 - 0x0000DFFF

        https://en.wikipedia.org/wiki/UTF-32
        r   r         r0      Tr1   N)r   r   )r   r   r   r   r    validate_utf32_characters   s   
46
z'UTF1632Prober.validate_utf32_characterspairc                 C   s   | j s'd|d   krdkrn nd| _ n'd|d   kr!dkr&n nd| _nd|d   kr3dkr9n nd| _ nd| _| jshd|d   krKdkrRn nd| _d	S d|d   kr^dkrfn d	S d| _d	S d	S d|d   krtdkr{n nd| _d	S d| _d	S )
a9  
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        r:   r      T   r;   Fr   N)r   r   r   r   )r   r=   r   r   r    validate_utf16_characters   s$   




z'UTF1632Prober.validate_utf16_charactersbyte_strc                 C   s   |D ]H}| j d }|| j|< |dkr,| | j | | jdd  | | jdd  |dkr:| j|  d7  < n	| j|  d7  < |  j d7  _ q| jS )Nr
   r1   r   r0   r   )r   r   r<   r@   r   r   state)r   rA   cmod4r   r   r    feed   s   

zUTF1632Prober.feedc                 C   sJ   | j tjtjhv r| j S |  dkrtj| _ | j S | jdkr"tj| _ | j S )Ng?i   )r   r   NOT_MEFOUND_ITget_confidencer   r   r   r   r    rB      s   
zUTF1632Prober.statec                 C   s(   |   s|  s|  s|  rdS dS )Ng333333?g        )r$   r#   r"   r!   r   r   r   r    rH      s   zUTF1632Prober.get_confidence)r	   N) __name__
__module____qualname____doc__r2   r3   r   r   propertystrr%   r'   floatr,   r.   boolr!   r"   r#   r$   r   intr<   r@   r   bytes	bytearrayr   rE   rB   rH   __classcell__r   r   r   r    r      s,    	



 r   N)typingr   r   charsetproberr   enumsr   r   r   r   r   r    <module>   s   