html unicode±àÂëת»»·½·¨
¶ÔÓÚ"&# 24038;&# 36793;"ÕâÖÖ&#¿ªÊ¼µÄ×Ö·û£¬Ó¦¸ÃΪhtml unicode±àÂëÀàÐÍ£¬½âÂë·½·¨ÈçÏ£º
s="&# 24038;& # 36793;"
s="×ó±ß"
import re
_=re.compile('&#(x)?([0-9a-fA-F]+);')
to_str=lambda s,charset='utf-8':_.sub(lambda result:unichr(int(result.group(2),result.group(1)=='x' and 16 or 10)).encode(charset) ,s)
print to_str(s)
Ïà¹ØÎĵµ£º
<select name="CluefromType" id="CluefromType" style="width: 182px" onchange="return CluefromOtherTypeSelected();">
<option value="Êг¡×ß·Ã">
Êг¡×ß·Ã
< ......
ʹÓÃTWebBrowser×é¼þ±£´æÍøÒ³ÎªhtmlºÍmhtÎļþ ÊÕ²Ø
Ò»¡¢±£´æÎªHTMLÎļþ
uses ActiveX;
...
procedure WB_SaveAs_HTML(WB : TWebBrowser; const FileName : string) ;
var
PersistStream: IPersistStreamInit;
Stream: IStream;
FileStream: TFileStream;
begin
if not Assigned(WB. ......
System.Net.WebClient wc = new System.Net.WebClient();
Byte[] pageData = wc.DownloadData("httP://www");
string s = System.Text.Encoding.Default.GetString(pageData); ......
ǰ¼¸Ìì×öÏîÄ¿¡£ÐèÒªÓõ½Ò»¸öWinFormµÄHTMLµÄ±à¼ºÍÏÔʾ¿Ø¼þ¡£.NET×Ô¼º²¢Ã»ÓÐÌṩÕâ·½ÃæµÄ¿Ø¼þ¡£È¥Googel°Ù¶ÈÁËһϡ£Ã»ÓÐÕÒµ½ºÏÊʵÄ.NET¿Ø¼þ¡£ÎÞÄÎÈ¥Ó¢ÎÄGoogelÁËһϡ£¹ûÈ»·¢ÏÖÁËÒ»¿îÃûΪ£º.NET Win HTML Editor Control 3.2µÄ¿Ø¼þ¡£ÏÂÔØÅäÖû·¾³ÊÔÓ᣷¢ÏÖÃâ·Ñ°æÌṩȫ¹¦ÄÜÊÔÓá£Î¨Ò»²»ºÃµÄµØ·½¾ÍÊÇÔÚ±à¼ÇøÓÐÒ»¸ö×¢²áµÄÁ ......
HTML ÊÇ Web ͳһÓïÑÔ£¬ÕâЩÈÝÄÉÔÚ¼âÀ¨ºÅÀïµÄ¼òµ¥±êÇ©£¬¹¹³ÉÁËÈç½ñµÄ Web¡£1991 Ä꣬Tim Berners-Lee ±àдÁËÒ»·Ý½Ð×ö “HTML ±êÇ©”µÄÎĵµ£¬ÀïÃæ°üº¬ÁË´óÔ¼20¸öÓÃÀ´±ê¼ÇÍøÒ³µÄ HTML ±êÇ©¡£ËûÖ±½Ó½èÓà SGML µÄ±ê¼Ç¸ñʽ£¬Ò²¾ÍÊǺóÀ´ÎÒÃÇ¿´µ½µÄ HTML ±ê¼ÇµÄ¸ñʽ¡£±¾ÎĽ²ÊöÁË HTML ÕâÃÅ Web ±ê¼ÇÓïÑԵķ¢Õ¹¼òÊ·¡£
......