现在这种是取到a标签值,然后遍历输出title。
能不能不需要遍历就输出title的值,一次性输出。
能不能直接取到a标签的title值。
/// <summary>
/// 获取网页a标签中title
/// </summary>
/// <param name="args"></param>
static void Main(string[] args)
{
string resultHtml = getHtml("http://bbs.csdn.net/topics/391047173");
//获取a标签中href
//string regexHref = @"(?is)<a((?!href=)[\s\S])*href=['""]?(?<href>[^'""]*)[^<]*</a>";
//获取a标签中title
string regexHrefTitleInA2 = @"(?is)<a[^>]+?title=(['""]?)(?<title>[^'""\s>]+)\1[^>]*>(?<text>(?:(?!</?a\b).)*)</a>";
var matches = Regex.Matches(resultHtml, regexHrefTitleInA2);
foreach (Match item in matches)
{
if (item.Success)
{
Console.WriteLine(item.Groups["title"].Value);
}
Console.WriteLine();
}
Console.Read();
}
/// <summary>
/// 获取指定网址内容
/// </summary>
/// <param name="url">网址</param>
/// <returns>网页内容字符串</returns>
private static string getHtml(string url)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
StreamReader sr = new StreamReader(stream,Encoding.GetEncoding(response.CharacterSet));
string html = sr.ReadToEnd();
sr.Close();
return html;
}