利用正则表达式提取html中的的Email地址
发布时间:2020-12-14 02:18:46 所属栏目:百科 来源:网络整理
导读:Demo源码如下: Demo下载地址: http://download.csdn.net/detail/zxcvbnm32123/5830571 using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.IO;using System.Text.RegularExpressions;namespace _12提取html
|
Demo源码如下: Demo下载地址:http://download.csdn.net/detail/zxcvbnm32123/5830571 using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
namespace _12提取html中的所有的Email地址
{
class Program
{
static void Main(string[] args)
{
string html = File.ReadAllText("1.htm");
//提取Email
//通过()提取组,正则表达式如下
string regEmail = @"([a-zA-Z0-9_-.]+)@([a-zA-Z0-9]+)(.[a-zA-Z0-9])+";
MatchCollection mc = Regex.Matches(html,regEmail);
//请统计出常用邮件服务提供商的用户使用。
//163
//126
//sohu
//gmail
//qq
//sina
//yahoo
//hotmail
int count_163 = 0;
int count_126 = 0;
int count_gmail = 0;
int count_qq = 0;
int count_sohu = 0;
int count_sina = 0;
int count_yahoo = 0;
int count_hotmail = 0;
foreach (Match match in mc)
{
#region MyRegion
//match.Groups[0].Value中存储的值遇match.Value中存储的值是一样的
//表示提取到的Email的完整字符串
//match.Value
Console.WriteLine(match.Value);
switch (match.Groups[2].Value)
{
//default:
}
Console.WriteLine(match.Groups[0].Value);//0:完整邮箱名
Console.WriteLine(match.Groups[1].Value);//:1:用户名
Console.WriteLine(match.Groups[2].Value);//:2:域名
Console.WriteLine(match.Groups[3].Value);//:3:组织名
#endregion
Console.WriteLine(match.Value);//输出所有邮箱地址
//通过match.Groups[]来获取提取组。注意:第0组存储的是完整匹配字符串,要获取组因该从索引1开始。
switch (match.Groups[2].Value.ToLower())
{
case "163":
count_163++;
break;
case "126":
count_126++;
break;
case "gmail":
count_gmail++;
break;
case "qq":
count_qq++;
break;
case "sohu":
count_sohu++;
break;
case "sina":
count_sina++;
break;
case "yahoo":
count_yahoo++;
break;
case "hotmail":
count_hotmail++;
break;
}
}
Console.WriteLine("=============统计信息============");
Console.WriteLine("邮箱总数:{0}",mc.Count);
Console.WriteLine("网易163邮箱用户数:{0}",count_163);
Console.WriteLine("网易126邮箱用户数:{0}",count_126);
Console.WriteLine("gmail邮箱用户数:{0}",count_gmail);
Console.WriteLine("qq邮箱用户数:{0}",count_qq);
Console.WriteLine("sohu邮箱用户数:{0}",count_sohu);
Console.WriteLine("sina邮箱用户数:{0}",count_sina);
Console.WriteLine("yahoo邮箱用户数:{0}",count_yahoo);
Console.WriteLine("hotmail邮箱用户数:{0}",count_hotmail);
Console.ReadKey();
}
}
}
“1.htm”截图如下:
通过调试控制,查看获取的全部网页字符串,截图如下:
输出结果如下:
Demo下载地址:http://download.csdn.net/detail/zxcvbnm32123/5830571 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
