PythonÖ®HTMLParser
"""A parser for HTML and XHTML."""
# This file is based on sgmllib.py, but the API is slightly different.
# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).
import _markupbase
import re
# Regular expressions used for parsing
interesting_normal = re.compile('[&<]')
interesting_cdata = re.compile(r'<(/|\Z)')
incomplete = re.compile('&[a-zA-Z#]')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
class HTMLParseError(Exception):
"""Exception raised for all parse errors."""
def __init__(self, msg, position=(None, None)):
assert msg
self.msg = msg
self.lineno = position[0]
self.offset = position[1]
def __str__(self):
result = self.msg
Ïà¹ØÎĵµ£º
ÔÚÉÏһƪÖÐÎÒÃÇÒѾʹÓÃcÓïÑÔʵÏÖÁËÒ»¸ö×î¼òµ¥µÄÀ©Õ¹Ä£¿é£¬ÕâһƪÖн«ÔÚÆä»ù´¡ÉϽøÐй¦Äܵķḻ¡£Ê×ÏÈÀ´¿¼ÂÇÈçºÎ´ÓÍⲿµÄPythonÏòCÄ£¿é´«µÝ½ø²ÎÊý£¬foo_bar2չʾÁËÈçºÎÏòCÄ£¿é´«µÝÕûÊý£¬¸¡µãÊý£¬×Ö·û´®Èý¸ö²ÎÊý£¬ÆäÖÐ"ids"Ö¸Ã÷ÁË´«Èë²ÎÊýµÄÊý¾ÝÀàÐÍ¡£PyArg_ParseTuple¸ºÔð¶Ôargs½øÐнâÎö£¬Èô½âÎöʧ°ÜÔò·µ»Ø0.´úÂë#include&n ......
eval(str [,globals [,locals ]])º¯Êý½«×Ö·û´®strµ±³ÉÓÐЧPython±í´ïʽÀ´ÇóÖµ£¬²¢·µ»Ø¼ÆËã½á¹û¡£
ͬÑùµØ, execÓï¾ä½«×Ö·û´®strµ±³ÉÓÐЧPython´úÂëÀ´Ö´ÐÐ.Ìṩ¸øexecµÄ´úÂëµÄÃû³Æ¿Õ¼äºÍexecÓï¾äµÄÃû³Æ¿Õ¼äÏàͬ.
×îºó£¬execfile(filename [,globals [,locals ]])º¯Êý¿ÉÒÔÓÃÀ´Ö´ÐÐÒ»¸öÎļþ,¿´ÏÂÃæµÄÀý×Ó:
>>> ev ......
±¾ÎÄ×ܽáÁËÔÚPythonÖÐÖ÷ÒªµÄ¼¸ÖÖ´¦ÀíXMLµÄ·½·¨£º
Element Tree
SAX
Dom
ʹÓõÚÈý·½Àà¿âÈçAmara 2.x£¬ libxml2dom µÈ
Element Tree
Element TreeÊÇPython 2.5ÒýÈëµÄʹÓüòµ¥£¬¿ì½ÝµÄ´¦Àí·½·¨£¬ Ò²ÊÇPython±ê×¼Àà¿âÍÆ¼öµÄ´¦ÀíXMLµÄ·½·¨¡£
ËüʹÓÃÊ÷ÐνڵãµÄÐÎʽÀ´¶ÁдXML¡£Element TreeÊÇÇáÁ¿¼¶µÄDOM£¬ ËùÒÔÊ ......
ÕÒÁ˰ëÌìûÕÒ×Å£¬ÖÕÓÚÔÚÓ¢ÎÄÕ¾µãÉÏÕÒµ½£¬»¹ÓиÐлȺÀïµÄʯͷºÍÇòÃÔ
>>> s = datetime.datetime(2009,1,1)
>>> time.mktime(s.timetuple())
1230739200.0
±ðÍ⸶һ¸öpython¶Ôʱ¼äµÄһЩº¯Êý£¬ºÜºÃÓõÄ
ÎÒÃÇÏȵ¼Èë±ØÐëÓõ½µÄÒ»¸ömodule
>>> import time
ÉèÖÃÒ»¸öʱ¼äµÄ¸ñʽ£¬ÏÂÃæ»áÓõ½
& ......
ʹÓà Python ·ÖÀëÖÐÎÄÓëÓ¢ÎĵĻìºÏ×Ö´®
LiYanrui
posted @ ´óÔ¼ 1 Äêǰ
in ³ÌÐòÉè¼Æ
with tags
python
, 614 ÔĶÁ
Õâ¸öÎÊÌâÊÇ×ö MkIV Ô¤´¦Àí³ÌÐò
ʱ¸ã¶¨µÄ£¬¾ÍÊǰÑÒ»¸ö»ìºÏÁËÖÐÓ¢ÎÄ»ìºÏ×Ö´®·ÖÀëΪӢÎÄÓëÖÐÎĵÄ×Ó×Ö´®£¬Æ©È磬½« ”ÎÒµÄ English ѧµÄ²»ºÃ
&ld ......