现在是直接获取整个a标签里,然后遍历输出item.groups["title"].value;
希望能修改正则表达式,直接获取title,然后输出item.value
static void Main(string[] args)
{
string resultHtml = getHtml("http://bbs.csdn.net/topics/391047173");
//获取a标签中href
//string regexHref = @"(?is)[^'""]*)[^<]*";
//获取a标签中title
string regexHrefTitleInA2 = @"(?is)<a[^>]+?title=(['""]?)(?
var matches = Regex.Matches(resultHtml, regexHrefTitleInA2);
foreach (Match item in matches)
{
if (item.Success)
{
Console.WriteLine(item.Groups["title"].Value);
}
Console.WriteLine();
}
Console.Read();
}
///
/// 获取指定网址内容
///
/// 网址
/// 网页内容字符串
private static string getHtml(string url)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
StreamReader sr = new StreamReader(stream,Encoding.GetEncoding(response.CharacterSet));
string html = sr.ReadToEnd();
sr.Close();
return html;
}