???<!-- GIF89;a -->
123123123123
.....................................................................................................................................???<!-- GIF89;a -->
123123123123
.....................................................................................................................................3
 fAE                 @   s   d Z ddlZddlZddlZddlmZ dgZejdZejdZ	ejdZ
ejdZejd	Zejd
ZejdZejdZejdZejdejZejd
ZejdZG dd dejZdS )zA parser for HTML and XHTML.    N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c               @   s   e Zd ZdZd:ZddddZdd	 Zd
d Zdd ZdZ	dd Z
dd Zdd Zdd Zdd Zd;ddZdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 ZdS )<r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    scriptstyleT)convert_charrefsc            C   s   || _ | j  dS )zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   reset)selfr    r
   0/opt/alt/python36/lib64/python3.6/html/parser.py__init__W   s    zHTMLParser.__init__c             C   s(   d| _ d| _t| _d| _tjj|  dS )z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elem_markupbase
ParserBaser   )r	   r
   r
   r   r   `   s
    zHTMLParser.resetc             C   s   | j | | _ | jd dS )zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goahead)r	   datar
   r
   r   feedh   s    zHTMLParser.feedc             C   s   | j d dS )zHandle any buffered data.   N)r   )r	   r
   r
   r   closeq   s    zHTMLParser.closeNc             C   s   | j S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_text)r	   r
   r
   r   get_starttag_textw   s    zHTMLParser.get_starttag_textc             C   s$   |j  | _tjd| j tj| _d S )Nz</\s*%s\s*>)lowerr   recompileIr   )r	   elemr
   r
   r   set_cdata_mode{   s    
zHTMLParser.set_cdata_modec             C   s   t | _d | _d S )N)r   r   r   )r	   r
   r
   r   clear_cdata_mode   s    zHTMLParser.clear_cdata_modec             C   sZ  | j }d}t|}x||k r| jr|| j r||jd|}|dk r|jdt||d }|dkrvtjdj	|| rvP |}n(| j
j	||}|r|j }n| jrP |}||k r| jr| j r| jt|||  n| j|||  | j||}||krP |j}|d|rLtj||r&| j|}	n|d|r>| j|}	nl|d|rV| j|}	nT|d|rn| j|}	n<|d	|r| j|}	n$|d
 |k r| jd |d
 }	nP |	dk r>|sP |jd|d
 }	|	dk r|jd|d
 }	|	dk r|d
 }	n|	d
7 }	| jr,| j r,| jt|||	  n| j|||	  | j||	}q|d|rtj||}|r|j dd }
| j|
 |j }	|d|	d
 s|	d
 }	| j||	}qn:d||d  kr| j|||d   | j||d }P q|d|rtj||}|rP|jd
}
| j|
 |j }	|d|	d
 sB|	d
 }	| j||	}qtj||}|r|r|j ||d  kr|j }	|	|kr|}	| j||d
 }P n,|d
 |k r| jd | j||d
 }nP qdstdqW |rH||k rH| j rH| jr*| j r*| jt|||  n| j|||  | j||}||d  | _ d S )Nr   <&"   z[\s;]z</z<!--z<?z<!r   r   z&#   ;zinteresting.search() lied)r   lenr   r   findrfindmaxr   r   searchr   starthandle_datar   Z	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declarationcharrefgrouphandle_charrefend	entityrefhandle_entityref
incompleteAssertionError)r	   r;   r   injZampposr2   r0   knamer
   r
   r   r      s    
 










zHTMLParser.goaheadc             C   s   | j }|||d  dks"td|||d  dkr@| j|S |||d  dkr^| j|S |||d  j d	kr|jd
|d }|dkrdS | j||d |  |d S | j|S d S )Nr&   z<!z+unexpected call to parse_html_declaration()   z<!--   z<![	   z	<!doctyper   r   r(   r(   )r   r?   r5   Zparse_marked_sectionr   r*   handle_declparse_bogus_comment)r	   r@   r   gtposr
   r
   r   r7      s    

z!HTMLParser.parse_html_declarationr   c             C   s`   | j }|||d  dks"td|jd|d }|dkr>d	S |rX| j||d |  |d S )
Nr&   <!</z"unexpected call to parse_comment()r   r   )rK   rL   r(   r(   )r   r?   r*   handle_comment)r	   r@   Zreportr   posr
   r
   r   rI     s    zHTMLParser.parse_bogus_commentc             C   sd   | j }|||d  dks"tdtj||d }|s:dS |j }| j||d |  |j }|S )Nr&   z<?zunexpected call to parse_pi()r   r(   )r   r?   picloser-   r.   	handle_pir;   )r	   r@   r   r2   rB   r
   r
   r   r6   !  s    zHTMLParser.parse_pic             C   s  d | _ | j|}|dk r|S | j}||| | _ g }tj||d }|sPtd|j }|jdj  | _	}x||k r0t
j||}|sP |jddd\}	}
}|
sd }n^|d d d  ko|dd  kn  p|d d d  ko|dd  kn  r|dd }|rt|}|j|	j |f |j }qnW ||| j }|dkr| j \}}d
| j kr|| j jd
 }t| j | j jd
 }n|t| j  }| j|||  |S |jd	r| j|| n"| j|| || jkr| j| |S )Nr   r   z#unexpected call to parse_starttag()r&   rF   '"r   />
r(   r(   r(   )r   rS   )r   check_for_whole_start_tagr   tagfind_tolerantr2   r?   r;   r9   r   r   attrfind_tolerantr   appendstripZgetposcountr)   r+   r/   endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr!   )r	   r@   endposr   attrsr2   rC   tagmZattrnamerestZ	attrvaluer;   linenooffsetr
   r
   r   r3   -  sR    
(*

zHTMLParser.parse_starttagc             C   s   | j }tj||}|r|j }|||d  }|dkr>|d S |dkr~|jd|rZ|d S |jd|rjd	S ||krv|S |d S |dkrd
S |dkrdS ||kr|S |d S tdd S )Nr   r   /z/>r&   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!r(   r(   r(   )r   locatestarttagend_tolerantr2   r;   r0   r?   )r	   r@   r   rb   rB   nextr
   r
   r   rU   `  s.    z$HTMLParser.check_for_whole_start_tagc             C   s2  | j }|||d  dks"tdtj||d }|s:dS |j }tj||}|s| jd k	rr| j|||  |S t	j||d }|s|||d  dkr|d S | j
|S |jdj }|jd|j }| j| |d S |jdj }| jd k	r|| jkr| j|||  |S | j|j  | j  |S )	Nr&   z</zunexpected call to parse_endtagr   rF   z</>r   r(   )r   r?   	endendtagr-   r;   
endtagfindr2   r   r/   rV   rI   r9   r   r*   handle_endtagr"   )r	   r@   r   r2   rJ   Z	namematchZtagnamer    r
   r
   r   r4     s8    


zHTMLParser.parse_endtagc             C   s   | j || | j| d S )N)r]   rk   )r	   ra   r`   r
   r
   r   r\     s    zHTMLParser.handle_startendtagc             C   s   d S )Nr
   )r	   ra   r`   r
   r
   r   r]     s    zHTMLParser.handle_starttagc             C   s   d S )Nr
   )r	   ra   r
   r
   r   rk     s    zHTMLParser.handle_endtagc             C   s   d S )Nr
   )r	   rD   r
   r
   r   r:     s    zHTMLParser.handle_charrefc             C   s   d S )Nr
   )r	   rD   r
   r
   r   r=     s    zHTMLParser.handle_entityrefc             C   s   d S )Nr
   )r	   r   r
   r
   r   r/     s    zHTMLParser.handle_datac             C   s   d S )Nr
   )r	   r   r
   r
   r   rM     s    zHTMLParser.handle_commentc             C   s   d S )Nr
   )r	   Zdeclr
   r
   r   rH     s    zHTMLParser.handle_declc             C   s   d S )Nr
   )r	   r   r
   r
   r   rP     s    zHTMLParser.handle_pic             C   s   d S )Nr
   )r	   r   r
   r
   r   unknown_decl  s    zHTMLParser.unknown_declc             C   s   t jdtdd t|S )NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r&   )
stacklevel)warningswarnDeprecationWarningr   )r	   sr
   r
   r   r     s    
zHTMLParser.unescape)r   r   )r   )__name__
__module____qualname____doc__r^   r   r   r   r   r   r   r!   r"   r   r7   rI   r6   r3   rU   r4   r\   r]   rk   r:   r=   r/   rM   rH   rP   rl   r   r
   r
   r
   r   r   ?   s8   		z
3"()ru   r   rn   r   Zhtmlr   __all__r   r   r>   r<   r8   r1   rO   ZcommentcloserV   rW   VERBOSErg   ri   rj   r   r   r
   r
   r
   r   <module>   s(   











