using System;
using System.IO;
using System.Net;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Threading;
namespace urlsave
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
choosefileth = new Thread(new ThreadStart(choosefile));
choosefileth.SetApartmentState(ApartmentState.STA);
StartPosition = FormStartPosition.CenterScreen;
}
string choosefilename;
private void button1_Click(object sender, EventArgs e)
{
try
{
readurl(this.textBox1.Text, “D:\\urlresult.html”);
MessageBox.Show(“读写完成,保存在:D:\\urlresult.html!”);
}
catch
{
MessageBox.Show(“保存失败!请检查URL”);
}
}
int i = 0;
void readurl(string url, string resfile)
{
WebRequest webRequest = WebRequest.Create(url);
webRequest.Timeout = 20000;
HttpWebRequest httpRequest = webRequest as HttpWebRequest;
Stream responseStream = httpRequest.GetResponse().GetResponseStream();
Stream responseStream_gbk = responseStream;
Stream responseStream_utf = responseStream;
StreamReader responseReadertemp = new StreamReader(responseStream, System.Text.Encoding.GetEncoding(“gb2312”));
StreamReader responseReader_utf = new StreamReader(responseStream_utf, Encoding.UTF8);
StreamReader responseReader_gbk = new StreamReader(responseStream_gbk, System.Text.Encoding.GetEncoding(“gb2312”));
//创建生成的文件
FileStream fs = new FileStream(resfile, FileMode.Create);
string line;
bool containCharset = false;
while (responseReadertemp.Peek() > -1)
{
line = responseReadertemp.ReadLine();
if (line.Contains(“charset=”))
{
if (line.Contains(“gb”) || line.Contains(“GB”))
{
containCharset = true;
StreamWriter sw_gbk = new StreamWriter(fs, System.Text.Encoding.GetEncoding(“gb2312”));
string stringResponse_gbk;
stringResponse_gbk = responseReader_gbk.ReadToEnd();
sw_gbk.Write(stringResponse_gbk);
sw_gbk.Close();
fs.Close();
responseReader_gbk.Close();
break;
}
else
{
containCharset = true;
StreamWriter sw_utf = new StreamWriter(fs, Encoding.UTF8);
string stringResponse_utf;
stringResponse_utf = responseReader_utf.ReadToEnd();
sw_utf.Write(stringResponse_utf);
sw_utf.Close();
fs.Close();
responseReader_utf.Close();
break;
}
}
}
if (!containCharset)
{
StreamWriter sw_gbk = new StreamWriter(fs, System.Text.Encoding.GetEncoding(“gb2312”));
string stringResponse_gbk;
stringResponse_gbk = responseReader_gbk.ReadToEnd();
sw_gbk.Write(stringResponse_gbk);
sw_gbk.Close();
fs.Close();
responseReader_gbk.Close();
}
}
void findsub(DirectoryInfo nextFolders)
{
foreach (DirectoryInfo nextFolder in nextFolders.GetDirectories())
findsub(nextFolder);
foreach (FileInfo NextFile in nextFolders.GetFiles()) //遍历文件
{
try
{
NextFile.OpenRead();
StreamReader sr = new StreamReader(NextFile.FullName);
string read = sr.ReadLine();
read = sr.ReadLine();
string url = null;
if (read[0] == ‘U’ && read[1] == ‘R’ && read[3] == ‘=’)
url = read.Substring(4, read.Length – 4);
else
continue;
string file = nextFolders.FullName + “\\” + NextFile.Name + “.html”;
readurl(url, file);
this.label4.Text = “已完成” + i.ToString() + “个网页抓取!”;
i++;
this.label4.Update();
// MessageBox.Show(NextFile.Name + “final”);
}
catch
{
this.listBox1.Items.Add(NextFile.FullName);
}
}
}
private void button2_Click(object sender, EventArgs e)
{
DirectoryInfo theFolder = new DirectoryInfo(choosefilename);
//string WantedPath = choosefilename.Substring(0, choosefilename.LastIndexOf(@”\”));
//string resfile = WantedPath + “//resweb”;
//if (!System.IO.Directory.Exists(resfile))
//{
// System.IO.Directory.CreateDirectory(resfile);
// MessageBox.Show(“成功!”);
//}
findsub(theFolder);
MessageBox.Show(“导出成功!”);
}
public Thread choosefileth;
public void choosefile()
{
FolderBrowserDialog dialog = new FolderBrowserDialog();
if (dialog.ShowDialog(this) == System.Windows.Forms.DialogResult.OK)
{
choosefilename = dialog.SelectedPath;
}
Application.ExitThread();
choosefileth = new Thread(new ThreadStart(choosefile));
choosefileth.SetApartmentState(ApartmentState.STA);
}
private void button3_Click(object sender, EventArgs e)
{
Control.CheckForIllegalCrossThreadCalls = false;
choosefileth.Start();
}
private void Form1_Load(object sender, EventArgs e)
{
this.label4.Text = null;
}
private void button4_Click(object sender, EventArgs e)
{
help helpdlg = new help();
helpdlg.ShowDialog();
}
}
}