利用正则表达式实现去除所有HTML标签代码
发布时间:2020-12-14 01:43:17 所属栏目:百科 来源:网络整理
导读:protected string str = "tabletrtdsdasasdsdd/td/tr/tablebrpsds/pimg id='img1' src='http://www.zhixing123.cn/uploads/allimg/110330/1104201G0-0.gif' width='100' height='50' alt=''aaassssbrimg src='http://www.zhixing123.cn/uploads/allimg/11033
protected string str = "<table><tr><td>sdasasdsdd</td></tr></table><br><p>sds</p><img id='img1' src='http://www.zhixing123.cn/uploads/allimg/110330/1104201G0-0.gif' width='100' height='50' alt=''>aaassss<br><img src='http://www.zhixing123.cn/uploads/allimg/110330/1104201G0-0.gif' width='100' height='50' alt=''> 说是道 "; protected void Page_Load(object sender,EventArgs e) { //string regexstr = @"<[^>]*>"; //去除所有的标签 //@"<script[^>]*?>.*?</script >" //去除所有脚本,中间部分也删除 // string regexstr = @"<img[^>]*>"; //去除图片的正则 // string regexstr = @"<(?!br).*?>"; //去除所有标签,只剩br // string regexstr = @"<table[^>]*?>.*?</table>"; //去除table里面的所有内容 string regexstr = @"<(?!img|br|p|/p).*?>"; //去除所有标签,只剩img,br,p str = Regex.Replace(str,regexstr,string.Empty,RegexOptions.IgnoreCase); } asp中正则表达式去除HTML标记(窃自eWebEditor) 2009年12月31日 星期四 下午 12:40 function ExecReg(re,content) Dim myRegExp,ResultString Set myRegExp = New RegExp myRegExp.Global = True myRegExp.Pattern = re ResultString = myRegExp.Replace(content,"" ) ExecReg = ResultString end function function DecodeFilter(html) html = LCase (html) ' 去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event,html = ExecReg( " </?script[^>]*> ",html) html = ExecReg( " (javascript|jscript|vbscript|vbs): ",html) html = ExecReg( " on(mouse|exit|error|click|key) ",html) html = ExecReg( " &# ",html) ' 去除表格<table><tr><td><th><a><p><img><div> html = ExecReg( " </?table[^>]*> ",html) html = ExecReg( " </?tr[^>]*> ",html) html = ExecReg( " </?th[^>]*> ",html) html = ExecReg( " </?td[^>]*> ",html) html = ExecReg( " </?a[^>]*> ",html) html = ExecReg( " </?p[^>]*> ",html) html = ExecReg( " </?img[^>]*> ",html) html = ExecReg( " </?div[^>]*> ",html) html = ExecReg( " </?ul[^>]*> ",html) html = ExecReg( " </?li[^>]*> ",html) html = ExecReg( " </?tbody[^>]*> ",html) html = ExecReg( " </?h1[^>]*> ",html) html = ExecReg( " </?h2[^>]*> ",html) html = ExecReg( " </?h3[^>]*> ",html) html = ExecReg( " </?h4[^>]*> ",html) html = ExecReg( " </?h5[^>]*> ",html) html = ExecReg( " </?h6[^>]*> ",html) html = ExecReg( " </?b[^>]*> ",html) html = ExecReg( " </?strong[^>]*> ",html) ' 去除样式类class="" html = ExecReg( " (<[^>]+) class=[^ |^>]*([^>]*>) ",html) ' 去除样式style="" html = ExecReg( " (<[^>]+) style=""[^""]*""([^>]*>) ",html) ' 去除XML<?xml> html = ExecReg( " <?xml[^>]*> ",html) ' 去除命名空间<o:p></o:p> html = ExecReg( " </?[a-z]+:[^>]*> ",html) ' 去除字体<font></font> html = ExecReg( " </?font[^>]*> ",html) ' 去除字幕<marquee></marquee> html = ExecReg( " </?marquee[^>]*> ",html) ' 去除对象<object><param><embed></object> html = ExecReg( " </?object[^>]*> ",html) html = ExecReg( " </?param[^>]*> ",html) html = ExecReg( " </?embed[^>]*> ",html) DecodeFilter = html end function Function RemoveHTML(strText) Dim RegEx Set RegEx = New RegExp RegEx.Pattern = "<[^>]*>" RegEx.Global = True RemoveHTML = RegEx.Replace(strText,"") End Function function nohtml(str) dim re Set re=new RegExp re.IgnoreCase =true re.Global=True re.Pattern="(&;.[^&;]*&;)" str=re.replace(str," ") re.Pattern="(&;/[^&;]*&;)" str=re.replace(str," ") str=replace(str," ","") str=replace(str,"") nohtml=str set re=nothing end function 注:java中 "html内容".replaceAll("<[^>]*>","") (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |