正则表达式简介与常用表达 - linux编程基础

"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$")

6)? ? ? ? 判断是否是合法的url地址，http://www.test.com/a.htm？id=3&name=aaa(问号后面的可有可无)

Regex.IsMatch(s, @"^\w+://\w+(\.\w+)+(\?\w+=\w+(&\w+=\w+)*)?$")

II)? ? ? ? Match & Matches

1)? ? ? ? 从文件路径中提取出文件名(包含后缀)

string s = "C:/funny/make/1.txt";

Match m=Regex.Match(s, @".+/(.+)");

if (m.Success)

{

Console.WriteLine(m.Groups[1].Value);

}

2)? ? ? ? 从“June? ? ? 26? ,? ? ? 1951”中提取出月份June来

string s = "June? ? ? 26? ,? ? ? 1951";

Match m=Regex.Match(s, @"(\w+)\s+\d+.+\d+");

if (m.Success)

{

Console.WriteLine(m.Groups[1].Value);

}

Console.ReadKey();

3)? ? ? ? 从Email中提取出用户名和域名，比如从test@163.com中提取出test和163.com。

string s = "test@163.com";

Match m=Regex.Match(s, @"^(.+)@(.+)$");

string name = m.Groups[1].Value;

string field = m.Groups[2].Value;

Console.WriteLine("用户名是{0}，域名是{1}",name,field);? ? ? ?

4)? ? ? ? 从一段文本中提取所有的数字

string s = "大家好，我是Hebe，我22岁了，身高180，我们团队有3个女女！";

MatchCollection mc= Regex.Matches(s, @"\d+");

foreach (Match m in mc)

{

Console.WriteLine(m.Value);

}

? Console.ReadKey();

5)? ? ? ? 从字符串中提取所有人名

string s = "大家好。我们是牛人。我是Jason。我是Jay。呜呜。fffff";

MatchCollection mc = Regex.Matches(s, @"我是(\w+)");

foreach (Match m in mc)

{

Console.WriteLine(m.Groups[1].Value);

}

Console.ReadKey();

6)? ? ? ? 从一个网站页面提取所有Email地址

StringBuilder sb = new StringBuilder();

//有问题的话把8080端口号加上,那个default网站的地址

string path = "http://localhost:8080/";

//创建WebClient

WebClient wc = new WebClient();

//注意编码问题(!!!)

wc.Encoding = Encoding.UTF8;

//从网站下载字符串

string content=wc.DownloadString(path);

//要会写正则

MatchCollection mc = Regex.Matches(content, @"\w+@\w+(\.\w)+");

foreach (Match m in mc)

{

if (m.Success)

{

sb.AppendLine(m.Value);

}

Console.WriteLine(sb.ToString());

Console.ReadKey();

7)? ? ? ? 从网站抓取所有的图片地址，下载到硬盘(网站所在目录用Cassini搭建一个)

//默认网站所在的目录

string address = "http://localhost:8080/";

WebClient wc = new WebClient();

wc.Encoding = Encoding.Default;

//取得网站的字符串信息

string str=wc.DownloadString(address);

//通过观察，分析出图片的存储形式

MatchCollection mc = Regex.Matches(str, @"src=""(hotgirls/(\d{2}_\d{2}\.jpg))""");

foreach (Match m in mc)

{

//得到每一幅图片的物理全路径

string picAddr=Path.Combine(address,m.Groups[1].Value);

//下载图片到自己指定的路径(且图片名称与网站上图片的名称一样)

wc.DownloadFile(picAddr, Path.Combine(@"E:",m.Groups[2].Value));

}

8)? ? ? ? 抓取所有超链接? ? ? ?

StringBuilder sb = new StringBuilder();

//默认网站所在的目录

string address = "http://localhost:8080/";

WebClient wc = new WebClient();

wc.Encoding = Encoding.UTF8;

//取得网站的字符串信息

string str=wc.DownloadString(address);

//通过观察，分析出图片的存储形式

//要非某一项的话(]+>[^<]+)，^一定要用[]括起来

MatchCollection mc = Regex.Matches(str, @"]+>[^<]+");

foreach (Match m in mc)

{

sb.AppendLine(m.Value);

}

Console.WriteLine(sb.ToString());

Console.ReadKey();

III)? ? ? ? Replace

1)? ? ? ? 删除所有连续的a，其实就是将连续的a替换为空字符串。

string s = "你aaa好aa哈哈a你";

s=Regex.Replace(s, @"a+", "");

Console.WriteLine(s);

Console.ReadKey();

2)? ? ? ? 我的生日是05/21/2010耶”转换为“我的生日是2010年05月21日耶

string s = "我的生日是05/21/2010耶";

s=Regex.Replace(s, @"(\d{2})/(\d{2})/(\d{4})", "$3年$1月$2日");

Console.WriteLine(s);

Console.ReadKey();

3)? ? ? ? hello 'welcome to'? beautiful'China' 输出? hello 【welcome to】? beautiful【China】

string s = " hello 'welcome to'? beautiful'China'? ";

//贪婪模式(?一般用来修饰+)

s=Regex.Replace(s, @"'(.+?)'", "【$1】");

Console.WriteLine(s);

Console.ReadKey();

4)? ? ? ? http://www.test.com? 替换为? http://www.test.com

string s = "http://www

正则表达式简介与常用表达(二)