From 294494e5559dcc92670dd9800caf976853aff876 Mon Sep 17 00:00:00 2001 From: username@email.com <yzy2002yzy@163.com> Date: 星期三, 24 七月 2024 14:23:02 +0800 Subject: [PATCH] 提交 --- zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs | 196 +++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 191 insertions(+), 5 deletions(-) diff --git a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs index 7c229e2..37c8ea7 100644 --- a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs +++ b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs @@ -10,6 +10,7 @@ using System.Net.Http; using System.Net.Http.Headers; using System.Text; +using System.Text.RegularExpressions; using System.Threading; using zhengcaioa.Models; namespace Crawler.sichuan @@ -256,7 +257,32 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; foreach (var sichuanjieshoudtl1 in lists) { @@ -264,7 +290,17 @@ try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if(bbb!=null && bbb.Length>= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -650,13 +686,51 @@ { list11 = lists.ToList(); } + + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -990,13 +1064,50 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -1381,13 +1492,50 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -1720,13 +1868,51 @@ list11 = lists.ToList(); } + + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); -- Gitblit v1.9.1