From 294494e5559dcc92670dd9800caf976853aff876 Mon Sep 17 00:00:00 2001
From: username@email.com <yzy2002yzy@163.com>
Date: 星期三, 24 七月 2024 14:23:02 +0800
Subject: [PATCH] 提交
---
zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs | 196 +++++++++++++++++++++++++++++++++++++++++++++++-
zhengcaioa/Crawler/Program.cs | 8 +-
zhengcaioa/Crawler/sichuan/sichuanoperation.cs | 10 +-
3 files changed, 200 insertions(+), 14 deletions(-)
diff --git a/zhengcaioa/Crawler/Program.cs b/zhengcaioa/Crawler/Program.cs
index 5b7408a..8a34395 100644
--- a/zhengcaioa/Crawler/Program.cs
+++ b/zhengcaioa/Crawler/Program.cs
@@ -144,12 +144,12 @@
thread.Start();
- Thread thread1 = new Thread(new ThreadStart(obj.Method2));
- thread1.Start();
+ //Thread thread1 = new Thread(new ThreadStart(obj.Method2));
+ //thread1.Start();
- Thread thread2 = new Thread(new ThreadStart(obj.Method3));
- thread2.Start();
+ //Thread thread2 = new Thread(new ThreadStart(obj.Method3));
+ //thread2.Start();
Console.WriteLine("Hello World!");
}
diff --git a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
index 7c229e2..37c8ea7 100644
--- a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
+++ b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
@@ -10,6 +10,7 @@
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
+using System.Text.RegularExpressions;
using System.Threading;
using zhengcaioa.Models;
namespace Crawler.sichuan
@@ -256,7 +257,32 @@
list11 = lists.ToList();
}
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
+ {
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
+ {
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
+ {
+ bbb = aaaa.Split(',');
+ }
+ }
+ }
+ }
+
+
+
+
+ int ccc = 0;
foreach (var sichuanjieshoudtl1 in lists)
{
@@ -264,7 +290,17 @@
try
{
sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
+ if(bbb!=null && bbb.Length>= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
@@ -650,13 +686,51 @@
{
list11 = lists.ToList();
}
+
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
+ {
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
+ {
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
+ {
+ bbb = aaaa.Split(',');
+ }
+ }
+ }
+ }
+
+
+
+
+ int ccc = 0;
+
foreach (var sichuanjieshoudtl1 in lists)
{
Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
try
{
sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
@@ -990,13 +1064,50 @@
list11 = lists.ToList();
}
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
+ {
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
+ {
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
+ {
+ bbb = aaaa.Split(',');
+ }
+ }
+ }
+ }
+
+
+
+
+ int ccc = 0;
+
foreach (var sichuanjieshoudtl1 in lists)
{
Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
try
{
sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
@@ -1381,13 +1492,50 @@
list11 = lists.ToList();
}
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
+ {
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
+ {
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
+ {
+ bbb = aaaa.Split(',');
+ }
+ }
+ }
+ }
+
+
+
+
+ int ccc = 0;
+
foreach (var sichuanjieshoudtl1 in lists)
{
Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
try
{
sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
@@ -1720,13 +1868,51 @@
list11 = lists.ToList();
}
+
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
+ {
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
+ {
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
+ {
+ bbb = aaaa.Split(',');
+ }
+ }
+ }
+ }
+
+
+
+
+ int ccc = 0;
+
foreach (var sichuanjieshoudtl1 in lists)
{
Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
try
{
sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
diff --git a/zhengcaioa/Crawler/sichuan/sichuanoperation.cs b/zhengcaioa/Crawler/sichuan/sichuanoperation.cs
index 3ebbfce..811a532 100644
--- a/zhengcaioa/Crawler/sichuan/sichuanoperation.cs
+++ b/zhengcaioa/Crawler/sichuan/sichuanoperation.cs
@@ -321,7 +321,7 @@
{
currPage += 1;
}
- Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿
}
@@ -608,7 +608,7 @@
{
currPage += 1;
}
- Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿
}
@@ -1051,7 +1051,7 @@
{
currPage += 1;
}
- Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿
}
@@ -1338,7 +1338,7 @@
{
currPage += 1;
}
- Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿
}
@@ -1691,7 +1691,7 @@
{
currPage += 1;
}
- Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿
}
--
Gitblit v1.9.1