用c#写爬虫时。遇到网页需要用户登陆才能访问,爬数据时需要设置cookie。代码如下
/// <summary>
/// Get提交-UTF-8编码
/// </summary>
/// <param name=”postUrl”>地址</param>
/// <param name=”postData”>参数</param>
/// <param name=”cookie”>cookie</param>
/// <param name=”headDict”>headDict</param>
/// <returns></returns>
public static string GetHtmlByCookie(string getUrl,string cookie)
{
HttpWebRequest request = null;
try
{
request = (HttpWebRequest)WebRequest.Create(getUrl);
CookieContainer cookieContainer = getCookie(“cookie”);
request.Accept = “*/*”;
request.UserAgent = “Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)”;
request.AllowAutoRedirect = true;
request.CookieContainer = cookieContainer;
using (HttpWebResponse res = (HttpWebResponse)request.GetResponse())
{
using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.GetEncoding(“utf-8”)))
{
string line = sr.ReadToEnd().Trim();
return line;
}
}
}
catch (Exception e)
{
throw e;
}
finally
{
if (request != null)
request.Abort();
}
}