Python之HTMLParser
"""A parser for HTML and XHTML."""
# This file is based on sgmllib.py, but the API is slightly different.
# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).
import _markupbase
import re
# Regular expressions used for parsing
interesting_normal = re.compile('[&<]')
interesting_cdata = re.compile(r'<(/|\Z)')
incomplete = re.compile('&[a-zA-Z#]')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
class HTMLParseError(Exception):
"""Exception raised for all parse errors."""
def __init__(self, msg, position=(None, None)):
assert msg
self.msg = msg
self.lineno = position[0]
self.offset = position[1]
def __str__(self):
result = self.msg
相关文档:
最近在用Python处理一些数据,数据需要存储到MySQL数据库中,采用MySQLdb来进行数据库的操作,但是被一个问题困扰了很久。在打开数据库的时候MySQLdb.connect(self.host, self.user, self.password, self.database, port=self.port)出异常,而且异常出现的位置非常奇怪。
出现在converters.py 164行
from decimal import ......
exec语句用来执行储存在字符串或文件中的Python语句。例如,我们可以在运行时生成一个包含Python代码的字符串,然后使用exec语句执行这些语句。下面是一个简单的例子。
>>> exec 'print "Hello World"'
Hello World
eval语句用来计算存储在字符串中的有效Python表达式。下面是一个简单的例子。
>>> ......
Python字符串操作
python如何判断一个字符串只包含数字字符
python 字符串比较
下面列出了常用的python实现的字符串操作
1.复制字符串
#strcpy(sStr1,sStr2)
sStr1 = 'strcpy'
sStr2 = sStr1
sStr1 = 'strcpy2'
print sStr2
2.连接字符串
#strcat(sStr1,sStr2)
sStr1 = 'strcat'
sStr2 = 'appen ......
运行一句python命令
对vc设置路径
include:D:\PYTHON31\INCLUDE
lib:D:\PYTHON31\LIBS
#include "stdafx.h"
#include "python.h"
int main(int argc, char* argv[])
{
Py_Initialize() ;
PyRun_SimpleString("print('Hello')");
//PyRun_SimpleString("print(dir())");
Py_Finalize();& ......
# -*- coding: cp936 -*-
from email.MIMEText import MIMEText
from email.MIMEMultipart import MIMEMultipart
import smtplib
#创建一个带附件的实例
msg = MIMEMultipart()
#构造附件
att = MIMEText(open('e:\\test.txt').read(), 'base64', 'gb2312')
att["Content-Type"] = 'application/ ......