搜索之路 c#从html中提取文本
直接封装成一个类的,用起来还挺方便的
using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Text.RegularExpressions;
/// <summary>
/// HtmlExtract 抽取html里面的文本信息
/// </summary>
public class HtmlExtract
{
#region private attributes
private string _strHtml;
#endregion
#region public mehtods
public HtmlExtract(string inStrHtml)
{ _strHtml = inStrHtml;}
public string ExtractText()
{
string result = _strHtml;
result = RemoveComment(result);
result = RemoveScript(result);
result = RemoveStyle(result);
result = RemoveTags(result);
return result.Trim();
}
#endregion
#region private methods
private string RemoveComment(string input)
{
string result = input;
//remove comment
result = Regex.Replace(result, @"<!--[^-]*-->", string.Empty, RegexOptions.IgnoreCase);
return result;
}
相关文档:
其实就是创建普通文件, CreateFile() 和WriteFile. 然后用ShellExcuteEx()来打开就行了(会自动调用IE).
HANDLE handle;
handle=CreateFile(L"\\windows\\google.html",GENERIC_WRITE,0,NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL);
if(INVALID_HANDLE_VALUE!= handle )
{
DWORD Num;
::WriteF ......
HTML有10个表格相关标签。下面是一个带有简介的列表,但是首先,文档要被正确的定义在HTML 4.01/XHTML 1或HTML 5下面:
<caption> 定义表格标题(4, 5)
<col> 为表格的列定义属性(4, 5)
<colgroup> 定义表格列的分组(4, 5)
<table> 定义表格(4, 5)
<tbody> 定义表格主体(4, 5)
< ......
引用命名空间:using System.Xml
1.检查所要操作的xml文件是否存在:
System.IO.File.Exists(文件路径及名称);
2.得到xml文件:
(1)在asp.net中可以这样得到:
XmlDocument xmlDoc = new XmlDocument();
//导入xml文档
xmlDoc.Load( Server.MapPath("xmlTesting.xml"));
//导入字符串
/ ......
set ANSI_NULLS ON
set QUOTED_IDENTIFIER ON
Go
----截取字符串,并出去html
create FUNCTION [dbo].[CutString] (@str varchar(1000),@length int)
RETURNS varchar(1000) AS
BEGIN
declare @mainstr varchar(1000)
declare @substr varchar(1000)
if(@str is not null or @st ......
flex嵌入到html:
用swfobject,下载的.js地址:http://www.adobe.com/devnet/activecontent/articles/devletter.html
在你的 HTML 页面头部<head>区嵌入这个脚本文件:<script type="text/javascript" src="swfobject.js"></script>
在你的 HTML 中写一个用来放 Flash 的容器,比如<div> ......