private void manhuadao_up()
{
T_num++;
while (Stop == false)
{
String html_code = getHtml(manhuadao_up_url, 1);//第一次得到源码
String rex = @"<div class=""latest-list"">.*?</div>";
Regex r = new Regex(rex, RegexOptions.IgnoreCase);
Match m = r.Matches(html_code)[0];
html_code = m.Value;//得到今日更新的内容源码
String list = regur2(html_code, @"<a href=""/book/(?<key>.*?)/"" title=");
String[] list_arr = list.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); //得到连接数组
Int32 ii = 1;
if (m_today == true)
{
ii = list_arr.Count();
}
for (Int32 k = 0; k < ii; k++)
{
String str = list_arr[k].ToString();
String url = manhuadao_url + "/book/" + str + "/"; //得到完整漫画地址
String list_html_code = getHtml(url, 1);//初始源码
list_html_code = Regex.Replace(list_html_code, "'", "", RegexOptions.IgnoreCase);//干掉单引号
//采集数据
String title = regur2(list_html_code, @"<h1>(?<key>.*?)</h1>");//漫画标题
String zuozhe = regur2(list_html_code, @"<strong>漫画作者:</strong><a.*?>(?<key>.*?)?</a>");//作者<strong>漫画作者:</strong><a.*?>.*?</a>
Int32 lianzai = 1;
String lz = regur2(list_html_code, @"<span class=""text"">更新至:(?<key>.*?)</span>");//连载状态
if (regur1(list_html_code, @"<span class=""red"">连载中</span>") == "err")
{
lianzai = 0;
lz = "完结";
}
String leibie = regur2(list_html_code, @"<strong>漫画剧情:</strong><a.*?>(?<key>.*?)</a>");//类别
String lb = "14";//类别
switch (leibie)
{
case "热血": lb = "1"; break;
case "格斗": lb = "2"; break;
case "科幻": lb = "3"; break;
case "竞技": lb = "4"; break;
case "搞笑": lb = "5"; break;
case "推理": lb = "6"; break;
case "恐怖": lb = "7"; break;
case "耽美": lb = "8"; break;
case "爱情": lb = "9"; break;
case "职场": lb = "10"; break;
case "社会": lb = "12"; break;
case "历史": lb = "13"; break;
default: break;
}
String qy = "4";
String qy1 = regur2(list_html_code, @"<strong>漫画地区:</strong><a.*?>(?<key>.*?)</a>");
switch (qy1)
{
case "日本漫画": qy = "0"; break;
case "港台漫画": qy = "1"; break;
case "欧美漫画": qy = "2"; break;
case "大陆漫画": qy = "3"; break;
default: break;
}
String jieshao = regur1(list_html_code, @"<div id=""intro-cut"">[\s\S]*?</div>");
jieshao = Regex.Replace(jieshao, @"<.*?>", "", RegexOptions.IgnoreCase);//漫画介绍 title="第69话 败者" class="status0" target="_blank"><span>69话<i>17p</i></span>
String m_img = regur2(list_html_code, "<p class=\"hcover\"><img src=\"(?<key>.*?)\"?>");
m_img = m_img.Remove(m_img.LastIndexOf("\" /"));
String m_letter = GetPYChar(title); //首字母
String list_code = regur1(list_html_code, @"<div class=""chapter-list cf mt10""\s?(id=""chpater-list-1"")?>.*?</div>");
//String list2 = regur3(list_code, "<a href=\"/book/" + str + "/(?<key>.*?)\" title=\"(?<key2>.*?)\"");
String list2 = regur3(list_code, "<a href=\"/book/" + str + "/(?<key>.*?)\" title=.*?<span>(?<key2>.*?)<i>");
String[] list2_arr = list2.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
// Array.Reverse(list2_arr);//倒过来采集
list2_arr = list2_arr.OrderBy(int_list => int.Parse(int_list.Split('#')[0])).ToArray();
Int32 hs = list2_arr.Count();//有多少话
title = ToHtmlEntity(title);
zuozhe = ToHtmlEntity(zuozhe);
jieshao = ToHtmlEntity(jieshao);
/////入库对比//////////////////////////////////////////////////////////////////////////////////////////////////////
Int32 m_total = 0;//多少话
Int32 m_no = 0;
sql_class conn = new sql_class();
SqlDataReader read = conn.getread("SELECT [m_total],[id] FROM [qTcms_Comics] where m_title like '" + title + "'"); //是否存在本部漫画
if (read.Read())
{
//已经入库,对比更新
m_total = Convert.ToInt32(read[0]);
String bd_id = read[1].ToString();//入库的漫画id
SqlDataReader sqlread = conn.getread("SELECT top 1 m_no FROM qTcms_ComicsUrl where m_id = " + bd_id + " order by id desc");
if (sqlread.Read())
{
m_no = Convert.ToInt32(sqlread[0]);
} if (list2_arr.Count() > m_total) //如果页面话数大于已有话数则更新
{
for (Int32 i_i = m_total; i_i < list2_arr.Count(); i_i++)
{
Get_list2(list2_arr[i_i], (i_i + 1).ToString(), bd_id, str);
}
//更新字段
//m_lianzai 连载状态
//m_type5 按连载状态分类:0,1
//m_total 总话数
//m_last 最后一话在url的id
//m_date2 更新时间 SqlDataReader m_last = conn.getread("SELECT top 1 id FROM qTcms_ComicsUrl where m_id = " + bd_id + " order by id desc");
m_last.Read();
String sql = "UPDATE [qTcms_Comics] SET [m_lianzai] = '" + lz + "',[m_type5] =" + lianzai + ",[m_total] = " + hs + ",m_date2='" + DateTime.Now + "',m_last=" + m_last[0] + " WHERE id =" + bd_id;
conn.getcom(sql);
New_s = true;
}
}
else //还没有入库,新添加游戏
{
SqlDataReader sqlread1 = conn.getread("select IDENT_CURRENT('qTcms_Comics')as id");
sqlread1.Read();
Int32 id = Convert.ToInt32(sqlread1[0]);
String b_id = (id + 1).ToString();
String md5str = md5(b_id);//id做个md5
String m1 = md5str.Substring(0, 2);
String m2 = md5str.Substring(2, 2);
String m3 = md5str.Substring(4, 2);
makefile(uploadPic + m1 + "\\" + m2 + "\\" + m3 + "\\");//创建目录
String path = uploadPic + m1 + "\\" + m2 + "\\" + m3 + "\\" + b_id + ".jpg";
String rk_path = "http://pic.manhua.maoren8.net/" + m1 + "/" + m2 + "/" + m3 + "/" + b_id + ".jpg";
Save_img(m_img, url, path, b_id);//下载封面图片
//try
//{
String sql = "INSERT INTO qTcms_Comics (m_id,m_look,m_title,m_Director,m_lianzai,m_content,m_pic,m_letter,m_date,m_type5,m_type1,m_type2,m_total,m_zhengli,m_zhengli2) VALUES('" + str + "',1,'" + title + "','" + zuozhe + "','" + lz + "','" + jieshao + "','" + rk_path + "','" + m_letter + "','" + DateTime.Now + "'," + lianzai + "," + lb + "," + qy + "," + hs + ",1,1)";
conn.getcom(sql);
//}
//catch
//{
// //数据插入失败,可能是有非法字符
//}
Int32 a = 0;
foreach (String b in list2_arr)
{
a++;
Get_list2(b, a.ToString(), b_id, str);
}
SqlDataReader m_last = conn.getread("SELECT top 1 id FROM qTcms_ComicsUrl where m_id = " + b_id + " order by id desc");
m_last.Read();
String sql1 = "UPDATE [qTcms_Comics] SET [m_lianzai] = '" + lz + "',[m_type5] =" + lianzai + ",[m_total] = " + hs + ",m_date2='" + DateTime.Now + "',m_last=" + m_last[0] + " WHERE id =" + b_id;
conn.getcom(sql1);
New_s = true;
}
}
//今日更新完成了
m_today = false;
Thread.Sleep(sheep);
}
T_num--;
this.Invoke(new System.Action<object>(delegate { this.label5.Text = "停止更新漫画岛"; }), 1);
if (T_num == 0)
{
this.Invoke(new System.Action<object>(delegate { this.button1.Enabled = true; }), 1);
}
}
{
T_num++;
while (Stop == false)
{
String html_code = getHtml(manhuadao_up_url, 1);//第一次得到源码
String rex = @"<div class=""latest-list"">.*?</div>";
Regex r = new Regex(rex, RegexOptions.IgnoreCase);
Match m = r.Matches(html_code)[0];
html_code = m.Value;//得到今日更新的内容源码
String list = regur2(html_code, @"<a href=""/book/(?<key>.*?)/"" title=");
String[] list_arr = list.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); //得到连接数组
Int32 ii = 1;
if (m_today == true)
{
ii = list_arr.Count();
}
for (Int32 k = 0; k < ii; k++)
{
String str = list_arr[k].ToString();
String url = manhuadao_url + "/book/" + str + "/"; //得到完整漫画地址
String list_html_code = getHtml(url, 1);//初始源码
list_html_code = Regex.Replace(list_html_code, "'", "", RegexOptions.IgnoreCase);//干掉单引号
//采集数据
String title = regur2(list_html_code, @"<h1>(?<key>.*?)</h1>");//漫画标题
String zuozhe = regur2(list_html_code, @"<strong>漫画作者:</strong><a.*?>(?<key>.*?)?</a>");//作者<strong>漫画作者:</strong><a.*?>.*?</a>
Int32 lianzai = 1;
String lz = regur2(list_html_code, @"<span class=""text"">更新至:(?<key>.*?)</span>");//连载状态
if (regur1(list_html_code, @"<span class=""red"">连载中</span>") == "err")
{
lianzai = 0;
lz = "完结";
}
String leibie = regur2(list_html_code, @"<strong>漫画剧情:</strong><a.*?>(?<key>.*?)</a>");//类别
String lb = "14";//类别
switch (leibie)
{
case "热血": lb = "1"; break;
case "格斗": lb = "2"; break;
case "科幻": lb = "3"; break;
case "竞技": lb = "4"; break;
case "搞笑": lb = "5"; break;
case "推理": lb = "6"; break;
case "恐怖": lb = "7"; break;
case "耽美": lb = "8"; break;
case "爱情": lb = "9"; break;
case "职场": lb = "10"; break;
case "社会": lb = "12"; break;
case "历史": lb = "13"; break;
default: break;
}
String qy = "4";
String qy1 = regur2(list_html_code, @"<strong>漫画地区:</strong><a.*?>(?<key>.*?)</a>");
switch (qy1)
{
case "日本漫画": qy = "0"; break;
case "港台漫画": qy = "1"; break;
case "欧美漫画": qy = "2"; break;
case "大陆漫画": qy = "3"; break;
default: break;
}
String jieshao = regur1(list_html_code, @"<div id=""intro-cut"">[\s\S]*?</div>");
jieshao = Regex.Replace(jieshao, @"<.*?>", "", RegexOptions.IgnoreCase);//漫画介绍 title="第69话 败者" class="status0" target="_blank"><span>69话<i>17p</i></span>
String m_img = regur2(list_html_code, "<p class=\"hcover\"><img src=\"(?<key>.*?)\"?>");
m_img = m_img.Remove(m_img.LastIndexOf("\" /"));
String m_letter = GetPYChar(title); //首字母
String list_code = regur1(list_html_code, @"<div class=""chapter-list cf mt10""\s?(id=""chpater-list-1"")?>.*?</div>");
//String list2 = regur3(list_code, "<a href=\"/book/" + str + "/(?<key>.*?)\" title=\"(?<key2>.*?)\"");
String list2 = regur3(list_code, "<a href=\"/book/" + str + "/(?<key>.*?)\" title=.*?<span>(?<key2>.*?)<i>");
String[] list2_arr = list2.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
// Array.Reverse(list2_arr);//倒过来采集
list2_arr = list2_arr.OrderBy(int_list => int.Parse(int_list.Split('#')[0])).ToArray();
Int32 hs = list2_arr.Count();//有多少话
title = ToHtmlEntity(title);
zuozhe = ToHtmlEntity(zuozhe);
jieshao = ToHtmlEntity(jieshao);
/////入库对比//////////////////////////////////////////////////////////////////////////////////////////////////////
Int32 m_total = 0;//多少话
Int32 m_no = 0;
sql_class conn = new sql_class();
SqlDataReader read = conn.getread("SELECT [m_total],[id] FROM [qTcms_Comics] where m_title like '" + title + "'"); //是否存在本部漫画
if (read.Read())
{
//已经入库,对比更新
m_total = Convert.ToInt32(read[0]);
String bd_id = read[1].ToString();//入库的漫画id
SqlDataReader sqlread = conn.getread("SELECT top 1 m_no FROM qTcms_ComicsUrl where m_id = " + bd_id + " order by id desc");
if (sqlread.Read())
{
m_no = Convert.ToInt32(sqlread[0]);
} if (list2_arr.Count() > m_total) //如果页面话数大于已有话数则更新
{
for (Int32 i_i = m_total; i_i < list2_arr.Count(); i_i++)
{
Get_list2(list2_arr[i_i], (i_i + 1).ToString(), bd_id, str);
}
//更新字段
//m_lianzai 连载状态
//m_type5 按连载状态分类:0,1
//m_total 总话数
//m_last 最后一话在url的id
//m_date2 更新时间 SqlDataReader m_last = conn.getread("SELECT top 1 id FROM qTcms_ComicsUrl where m_id = " + bd_id + " order by id desc");
m_last.Read();
String sql = "UPDATE [qTcms_Comics] SET [m_lianzai] = '" + lz + "',[m_type5] =" + lianzai + ",[m_total] = " + hs + ",m_date2='" + DateTime.Now + "',m_last=" + m_last[0] + " WHERE id =" + bd_id;
conn.getcom(sql);
New_s = true;
}
}
else //还没有入库,新添加游戏
{
SqlDataReader sqlread1 = conn.getread("select IDENT_CURRENT('qTcms_Comics')as id");
sqlread1.Read();
Int32 id = Convert.ToInt32(sqlread1[0]);
String b_id = (id + 1).ToString();
String md5str = md5(b_id);//id做个md5
String m1 = md5str.Substring(0, 2);
String m2 = md5str.Substring(2, 2);
String m3 = md5str.Substring(4, 2);
makefile(uploadPic + m1 + "\\" + m2 + "\\" + m3 + "\\");//创建目录
String path = uploadPic + m1 + "\\" + m2 + "\\" + m3 + "\\" + b_id + ".jpg";
String rk_path = "http://pic.manhua.maoren8.net/" + m1 + "/" + m2 + "/" + m3 + "/" + b_id + ".jpg";
Save_img(m_img, url, path, b_id);//下载封面图片
//try
//{
String sql = "INSERT INTO qTcms_Comics (m_id,m_look,m_title,m_Director,m_lianzai,m_content,m_pic,m_letter,m_date,m_type5,m_type1,m_type2,m_total,m_zhengli,m_zhengli2) VALUES('" + str + "',1,'" + title + "','" + zuozhe + "','" + lz + "','" + jieshao + "','" + rk_path + "','" + m_letter + "','" + DateTime.Now + "'," + lianzai + "," + lb + "," + qy + "," + hs + ",1,1)";
conn.getcom(sql);
//}
//catch
//{
// //数据插入失败,可能是有非法字符
//}
Int32 a = 0;
foreach (String b in list2_arr)
{
a++;
Get_list2(b, a.ToString(), b_id, str);
}
SqlDataReader m_last = conn.getread("SELECT top 1 id FROM qTcms_ComicsUrl where m_id = " + b_id + " order by id desc");
m_last.Read();
String sql1 = "UPDATE [qTcms_Comics] SET [m_lianzai] = '" + lz + "',[m_type5] =" + lianzai + ",[m_total] = " + hs + ",m_date2='" + DateTime.Now + "',m_last=" + m_last[0] + " WHERE id =" + b_id;
conn.getcom(sql1);
New_s = true;
}
}
//今日更新完成了
m_today = false;
Thread.Sleep(sheep);
}
T_num--;
this.Invoke(new System.Action<object>(delegate { this.label5.Text = "停止更新漫画岛"; }), 1);
if (T_num == 0)
{
this.Invoke(new System.Action<object>(delegate { this.button1.Enabled = true; }), 1);
}
}
解决方案 »
- 通过串口利用MPI协议与西门子PLC S7-300系统通讯的实现方式(C#)
- 如何让这textbox 的值得注意1000
- datatable select() error求教
- 急救!ISAPI请救数超过100,网站打开的时候非常慢!
- 急!在线等,多线程读取数据并写入的问题
- 在VS2008调试运行正常,在IIS中运行报类型“System.Web.UI.WebControls.ControlParameter”不具有名为“DbType
- 如何开发一个B2C电子商务网站
- C#下如何打开管理员权限的CMD
- 一个关于DS的操作的问题
- C#构造函数和析构函数
- 求助!C#mono开发怎样播放flash动画文件
- winform一个窗体控制多个独立的进程
{
String md5str = md5(mu);
String m1 = md5str.Substring(0, 2);
String m2 = md5str.Substring(2, 2);
String m3 = md5str.Substring(4, 2);
String p = upload + m1 + "\\" + m2 + "\\" + m3 + "\\" + mu + "\\" + mu_path; //保存目录地址
if (makefile(p) == false)//创建文件夹
{
MessageBox.Show("无法创建文件夹,可能的原因有:\n没有磁盘写入权限\n要创建的目录错误\n路径:" + p + "");
return;
}
String[] url_arr = list_url.Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries);
String h_text = Regex.Replace(url_arr[1], @"<.*?>", "", RegexOptions.IgnoreCase);//去掉html字符
String html = getHtml(manhuadao_url + "/book/" + y_id + "/" + url_arr[0], 1);//http://www.manhuadao.com/book/shuimeiren/26430
if (html != "err")
{
String pic = regur1(html, reg4);
String[] pic_arr = pic.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
Int32 i = 0;
String pic_up_path = String.Empty;
sql_class conn = new sql_class();
//判断这一话有没有入库
SqlDataReader sqlread = conn.getread("SELECT id FROM qTcms_ComicsUrl where m_id =" + mu + " and m_name like '" + h_text + "'");
Int32 m_no = 0;
if (!sqlread.HasRows)
{
foreach (String k in pic_arr)
{
i++;
pic_up_path += "http://file.manhua.maoren8.net/" + m1 + "/" + m2 + "/" + m3 + "/" + mu + "/" + mu_path + "/" + i + ".png" + "$qingtiandy$";//入库地址
conn.getcom("INSERT INTO temp (m_id,h_id,pic_name,pic_url,list,m1,m2,m3,laiyuan)VALUES(" + mu + "," + mu_path + ",'" + i + ".png'" + ",'" + k + "','/book/" + y_id + "/" + url_arr[0] + "','" + m1 + "','" + m2 + "','" + m3 + "','manhuadao')"); //入临时库,表名temp
}
pic_up_path = pic_up_path.Remove(pic_up_path.LastIndexOf("$qingtiandy$"));
pic_up_path = pic_up_path.Replace("\\", "/"); //最终入库的地址
sqlread = conn.getread("SELECT top 1 m_no FROM qTcms_ComicsUrl where m_id = " + mu + " order by id desc");
if (sqlread.Read())
{
m_no = Convert.ToInt32(sqlread[0]) + 5;
}
String Sql = "insert into qTcms_ComicsUrl (m_id,m_name,m_url,m_total,m_no,m_if,m_date) values(" + mu + ",'" + h_text + "','" + pic_up_path + "'," + i + "," + m_no + ",0,'" + DateTime.Now + "')";
conn.getcom(Sql); //地址入正式库,表名,qTcms_ComicsUrl
}
}
else
{
//MessageBox.Show("错误:目标地址无法打开", "错误");
} }
m_img = m_img.Remove(m_img.LastIndexOf("\" /"));
就是这行,把最后一个反斜杠替换成斜杠。
m_img = m_img.Remove(m_img.LastIndexOf("\" /"));用以上的正则,提取到的结果是:
http://i1.manhuadao.com/bcover/2014/1/151445093_h.jpg" /
后面多了一个/符号,所以用那句去去掉,按道理这个应该用正则直接获取到没有反斜杠的,不知道为何就是要出来一个反斜杠。
源码是:
<p class="hcover"><img src="http://i1.manhuadao.com/bcover/2014/5/241640153_h.jpg">
用正则得到的始终有个 /符号。
并只提取到结果:http://i1.manhuadao.com/bcover/2014/5/241640153_h.jpg
看抛出的错误,有这样的描述:
ArgumentOutOfRangeException
这就是下标越界了,下标小于0的时候出错。找到这个地方,并处理