|
// Create a request for the URL.
string strUrl;
strUrl = "http://www.google.cn";
WebRequest request = HttpWebRequest.Create(strUrl);
// If required by the server, set the credentials.
request.Credentials = CredentialCache.DefaultCredentials;
// Get the response.
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
// Display the status.
//Response.Write(response.StatusCode);
// Get the stream containing content returned by the server.
Stream dataStream = response.GetResponseStream();
// Open the stream using a StreamReader for easy access.
StreamReader reader = new StreamReader(dataStream, System.Text.Encoding.Default);
// Read the content.
string responseFromServer = reader.ReadToEnd();
// Display the content.
string pattern = @"<(((a|link).*href)|((img|script).*src)|(form.*action))\s*=\s*[""']?(?<link>[^'""\s]*)";
Regex reg = new Regex(pattern, RegexOptions.IgnoreCase);
for (Match m = reg.Match(responseFromServer); m.Success; m = m.NextMatch())
{
Response.Write(m.Groups["link"].Value.ToString()+"<br>");
}
Response.Write(responseFromServer);
// Cleanup the streams and the response.
reader.Close();
dataStream.Close();
response.Close();
======================================================================
其中的正则表达式能解析出网页中带引号的地址,但为什么不带引号的出不来?
请大家帮我看一下吧,先谢谢了。 |
|