From 294494e5559dcc92670dd9800caf976853aff876 Mon Sep 17 00:00:00 2001 From: username@email.com <yzy2002yzy@163.com> Date: 星期三, 24 七月 2024 14:23:02 +0800 Subject: [PATCH] 提交 --- zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs | 196 +++++++++++++++++++++++++++++++++++++++++++++++- zhengcaioa/Crawler/Program.cs | 8 +- zhengcaioa/Crawler/sichuan/sichuanoperation.cs | 10 +- 3 files changed, 200 insertions(+), 14 deletions(-) diff --git a/zhengcaioa/Crawler/Program.cs b/zhengcaioa/Crawler/Program.cs index 5b7408a..8a34395 100644 --- a/zhengcaioa/Crawler/Program.cs +++ b/zhengcaioa/Crawler/Program.cs @@ -144,12 +144,12 @@ thread.Start(); - Thread thread1 = new Thread(new ThreadStart(obj.Method2)); - thread1.Start(); + //Thread thread1 = new Thread(new ThreadStart(obj.Method2)); + //thread1.Start(); - Thread thread2 = new Thread(new ThreadStart(obj.Method3)); - thread2.Start(); + //Thread thread2 = new Thread(new ThreadStart(obj.Method3)); + //thread2.Start(); Console.WriteLine("Hello World!"); } diff --git a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs index 7c229e2..37c8ea7 100644 --- a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs +++ b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs @@ -10,6 +10,7 @@ using System.Net.Http; using System.Net.Http.Headers; using System.Text; +using System.Text.RegularExpressions; using System.Threading; using zhengcaioa.Models; namespace Crawler.sichuan @@ -256,7 +257,32 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; foreach (var sichuanjieshoudtl1 in lists) { @@ -264,7 +290,17 @@ try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if(bbb!=null && bbb.Length>= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -650,13 +686,51 @@ { list11 = lists.ToList(); } + + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -990,13 +1064,50 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -1381,13 +1492,50 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -1720,13 +1868,51 @@ list11 = lists.ToList(); } + + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); diff --git a/zhengcaioa/Crawler/sichuan/sichuanoperation.cs b/zhengcaioa/Crawler/sichuan/sichuanoperation.cs index 3ebbfce..811a532 100644 --- a/zhengcaioa/Crawler/sichuan/sichuanoperation.cs +++ b/zhengcaioa/Crawler/sichuan/sichuanoperation.cs @@ -321,7 +321,7 @@ { currPage += 1; } - Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿 + Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿 } @@ -608,7 +608,7 @@ { currPage += 1; } - Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿 + Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿 } @@ -1051,7 +1051,7 @@ { currPage += 1; } - Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿 + Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿 } @@ -1338,7 +1338,7 @@ { currPage += 1; } - Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿 + Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿 } @@ -1691,7 +1691,7 @@ { currPage += 1; } - Thread.CurrentThread.Join(1000 * 2);//闃绘璁惧畾鏃堕棿 + Thread.CurrentThread.Join(1000 * 20);//闃绘璁惧畾鏃堕棿 } -- Gitblit v1.9.1