Ò׽ؽØÍ¼Èí¼þ¡¢µ¥Îļþ¡¢Ãâ°²×°¡¢´¿ÂÌÉ«¡¢½ö160KB

PythonÖ®HTMLParser

"""A parser for HTML and XHTML."""
# This file is based on sgmllib.py, but the API is slightly different.
# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).
import _markupbase
import re
# Regular expressions used for parsing
interesting_normal = re.compile('[&<]')
interesting_cdata = re.compile(r'<(/|\Z)')
incomplete = re.compile('&[a-zA-Z#]')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
class HTMLParseError(Exception):
"""Exception raised for all parse errors."""
def __init__(self, msg, position=(None, None)):
assert msg
self.msg = msg
self.lineno = position[0]
self.offset = position[1]
def __str__(self):
result = self.msg


Ïà¹ØÎĵµ£º

¹ØÓÚPythonÕýÔò±í´ïʽµÄÇø·Ö´óСдµÄÎÊÌâ

×î½üÔÚÓÃPython´¦ÀíһЩÊý¾Ý£¬Êý¾ÝÐèÒª´æ´¢µ½MySQLÊý¾Ý¿âÖУ¬²ÉÓÃMySQLdbÀ´½øÐÐÊý¾Ý¿âµÄ²Ù×÷£¬µ«ÊDZ»Ò»¸öÎÊÌâÀ§ÈÅÁ˺ܾá£ÔÚ´ò¿ªÊý¾Ý¿âµÄʱºòMySQLdb.connect(self.host, self.user, self.password, self.database, port=self.port)³öÒì³££¬¶øÇÒÒì³£³öÏÖµÄλÖ÷dz£Ææ¹Ö¡£
³öÏÖÔÚconverters.py 164ÐÐ
from decimal import ......

python ÃüÁîÐвÎÊý


±¾Æª½«½éÉÜpythonÖÐsys, getoptÄ£¿é´¦ÀíÃüÁîÐвÎÊý
Èç¹ûÏë¶Ôpython½Å±¾´«²ÎÊý£¬pythonÖжÔÓ¦µÄargc, argv(cÓïÑÔµÄÃüÁîÐвÎÊý)ÊÇÊ²Ã´ÄØ£¿
ÐèҪģ¿é£ºsys
²ÎÊý¸öÊý£ºlen(sys.argv)
½Å±¾Ãû£º    sys.argv[0]
²ÎÊý1£º     sys.argv[1]
²ÎÊý2£º     sys.argv[2]
test.py
1
import ......

pythonʱ¼äתΪʱ¼ä´Á

ÕÒÁ˰ëÌìûÕÒ×Å£¬ÖÕÓÚÔÚÓ¢ÎÄÕ¾µãÉÏÕÒµ½£¬»¹ÓиÐлȺÀïµÄʯͷºÍÇòÃÔ
>>> s = datetime.datetime(2009,1,1)
>>> time.mktime(s.timetuple())
1230739200.0
±ðÍ⸶һ¸öpython¶Ôʱ¼äµÄһЩº¯Êý£¬ºÜºÃÓõÄ
ÎÒÃÇÏȵ¼Èë±ØÐëÓõ½µÄÒ»¸ömodule
>>> import time
ÉèÖÃÒ»¸öʱ¼äµÄ¸ñʽ£¬ÏÂÃæ»áÓõ½
& ......

ZZ python´¦ÀíÖÐÎÄ


ÒÔÏÂΪÎÒÔÚ´¦ÀíÍøÒ³±àÂëÎÊÌâ¿´µÄÎÄÕ£¬ÏÖÔÚдÅÀ³æ×Ô¼º´¦ÀíÍøÒ³±àÂëÎÊÌâȷʵ±È½ÏÂé·³£¬ÍøÒ³Ò²Ã»Óй涨
Ò»¶¨ÒªÔÚMETAÍ·ÐÅÏ¢Àï´ø±àÂëÐÅÏ¢£¬¶øºÜ¶àÈËдµÄ̽²â±àÂë¾ÍÊÇ»ñÈ¡META£¬ÕâÊDz»¿ÉÈ¡µÄ£¬¶øÓеÄÈËÔòÊÇ·ÖÎöHTTP·µ»ØµÄÊý¾Ý°üÀï±ßµÄ
charset£¬È磨a.headers.getparam('charset')£©£¬¶øHTTP°üûÓй涨һ¶¨Òª´øcharset£¬ËùÒ ......
© 2009 ej38.com All Rights Reserved. ¹ØÓÚE½¡ÍøÁªÏµÎÒÃÇ | Õ¾µãµØÍ¼ | ¸ÓICP±¸09004571ºÅ