From d4431c7e89865a506af8662244004d0baa7ed609 Mon Sep 17 00:00:00 2001 From: username@email.com <yzy2002yzy@163.com> Date: 星期三, 11 六月 2025 12:24:15 +0800 Subject: [PATCH] 投诉处理,爬 --- zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs | 381 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 345 insertions(+), 36 deletions(-) diff --git a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs index 0dce7a2..37c8ea7 100644 --- a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs +++ b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs @@ -3,10 +3,14 @@ using System; using System.Collections.Generic; using System.Globalization; +using System.IO; +using System.IO.Compression; using System.Linq; +using System.Net; using System.Net.Http; using System.Net.Http.Headers; using System.Text; +using System.Text.RegularExpressions; using System.Threading; using zhengcaioa.Models; namespace Crawler.sichuan @@ -41,7 +45,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 鎰忓悜鍏紑 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=9&dbselect=bidx&kw=%E6%84%8F%E5%90%91&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=9&dbselect=bidx&kw=%E6%84%8F%E5%90%91&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎰忓悜鍏紑 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); yixianggonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎰忓悜鍏紑 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -51,7 +55,7 @@ #region 鍏紑鎷涙爣 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=1&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=1&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍏紑鎷涙爣 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "1", "鍏紑鎷涙爣"); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍏紑鎷涙爣 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -60,7 +64,7 @@ #region 璇环 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=2&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=2&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 璇环 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "4", "璇环"); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 璇环 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -68,7 +72,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 绔炰簤鎬ц皥鍒� currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=3&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=3&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬ц皥鍒� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "5", "绔炰簤鎬ц皥鍒�"); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬ц皥鍒� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -76,7 +80,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 鍗曚竴鏉ユ簮 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=4&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=4&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍗曚竴鏉ユ簮 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "7", "鍗曚竴鏉ユ簮"); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍗曚竴鏉ユ簮 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -84,7 +88,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 閭�璇锋嫑鏍� currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=6&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=6&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 閭�璇锋嫑鏍� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "2", "閭�璇锋嫑鏍�"); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 閭�璇锋嫑鏍� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -92,7 +96,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 绔炰簤鎬х鍟� currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=10&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=10&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬х鍟� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "3", "绔炰簤鎬х鍟�"); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬х鍟� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -100,7 +104,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 涓爣鍏憡 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=7&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=7&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷涓爣鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); zhongbiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷涓爣鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -108,7 +112,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 鎴愪氦鍏憡 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=11&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=11&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎴愪氦鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); zhongbiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎴愪氦鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -116,7 +120,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 鏇存鍏憡 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=8&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=8&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鏇存鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); gengzhenggonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鏇存鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -124,7 +128,7 @@ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿 #region 搴熸爣鍏憡 currPage = 1; - sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=12&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; + sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=12&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName="; logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷搴熸爣鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷"); feibiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage); logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷搴熸爣鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷"); @@ -171,7 +175,7 @@ //閲囪喘鍏憡 - public static void caigougonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page, string cgfs, string cgfsName) + public static async void caigougonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page, string cgfs, string cgfsName) { sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A")); @@ -185,12 +189,36 @@ string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page); try { - using (HttpClient client = new HttpClient()) + HttpClientHandler handler = new HttpClientHandler(); + handler.CookieContainer = new CookieContainer(); + using (HttpClient client = new HttpClient(handler)) { client.Timeout = TimeSpan.FromSeconds(60); + client.DefaultRequestHeaders.Add("Accept", "*/*"); + client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br"); + client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn"); //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>(); HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result; - var res = response.Content.ReadAsStringAsync().Result; + string res = ""; + if (response.IsSuccessStatusCode) + { + using (var responseStream = await response.Content.ReadAsStreamAsync()) + { + using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress)) + { + using (var reader = new StreamReader(decompressedStream)) + { + res = await reader.ReadToEndAsync(); + // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹� + } + } + } + } + else + { + // 澶勭悊璇锋眰澶辫触鐨勬儏鍐� + } + //var res = response.Content.ReadAsStringAsync().Result; var document = parser.ParseDocument(res); var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault(); var contentList = sssdfsdfsd.QuerySelector("ul"); @@ -229,7 +257,32 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; foreach (var sichuanjieshoudtl1 in lists) { @@ -237,7 +290,17 @@ try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if(bbb!=null && bbb.Length>= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -289,7 +352,7 @@ { var ssss = fujianya.Id; var sssss = fujianya.InnerHtml; - fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; + fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; } fujianhtml += "</tbody></table></div>"; @@ -542,7 +605,7 @@ //鎰忓悜鍏紑 - public static void yixianggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime,string endTime, int page) + public static async void yixianggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime,string endTime, int page) { sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A")); @@ -556,12 +619,36 @@ string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page); try { - using (HttpClient client = new HttpClient()) + HttpClientHandler handler = new HttpClientHandler(); + handler.CookieContainer = new CookieContainer(); + using (HttpClient client = new HttpClient(handler)) { client.Timeout = TimeSpan.FromSeconds(60); + client.DefaultRequestHeaders.Add("Accept", "*/*"); + client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br"); + client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn"); //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>(); HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result; - var res = response.Content.ReadAsStringAsync().Result; + //var res = response.Content.ReadAsStringAsync().Result; + string res = ""; + if (response.IsSuccessStatusCode) + { + using (var responseStream = await response.Content.ReadAsStreamAsync()) + { + using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress)) + { + using (var reader = new StreamReader(decompressedStream)) + { + res = await reader.ReadToEndAsync(); + // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹� + } + } + } + } + else + { + // 澶勭悊璇锋眰澶辫触鐨勬儏鍐� + } var document = parser.ParseDocument(res); var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault(); var contentList = sssdfsdfsd.QuerySelector("ul"); @@ -599,13 +686,51 @@ { list11 = lists.ToList(); } + + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -655,7 +780,7 @@ { var ssss = fujianya.Id; var sssss = fujianya.InnerHtml; - fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; + fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; } fujianhtml += "</tbody></table></div>"; @@ -857,7 +982,7 @@ //缁撴灉鍏憡 - public static void zhongbiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page) + public static async void zhongbiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page) { sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A")); @@ -871,12 +996,36 @@ string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page); try { - using (HttpClient client = new HttpClient()) + HttpClientHandler handler = new HttpClientHandler(); + handler.CookieContainer = new CookieContainer(); + using (HttpClient client = new HttpClient(handler)) { client.Timeout = TimeSpan.FromSeconds(60); + client.DefaultRequestHeaders.Add("Accept", "*/*"); + client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br"); + client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn"); //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>(); HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result; - var res = response.Content.ReadAsStringAsync().Result; + //var res = response.Content.ReadAsStringAsync().Result; + string res = ""; + if (response.IsSuccessStatusCode) + { + using (var responseStream = await response.Content.ReadAsStreamAsync()) + { + using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress)) + { + using (var reader = new StreamReader(decompressedStream)) + { + res = await reader.ReadToEndAsync(); + // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹� + } + } + } + } + else + { + // 澶勭悊璇锋眰澶辫触鐨勬儏鍐� + } var document = parser.ParseDocument(res); var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault(); var contentList = sssdfsdfsd.QuerySelector("ul"); @@ -915,13 +1064,50 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -971,7 +1157,7 @@ { var ssss = fujianya.Id; var sssss = fujianya.InnerHtml; - fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; + fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; } fujianhtml += "</tbody></table></div>"; @@ -1224,7 +1410,7 @@ } //鏇存鍏憡 - public static void gengzhenggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page) + public static async void gengzhenggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page) { sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A")); @@ -1238,12 +1424,36 @@ string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page); try { - using (HttpClient client = new HttpClient()) + HttpClientHandler handler = new HttpClientHandler(); + handler.CookieContainer = new CookieContainer(); + using (HttpClient client = new HttpClient(handler)) { client.Timeout = TimeSpan.FromSeconds(60); + client.DefaultRequestHeaders.Add("Accept", "*/*"); + client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br"); + client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn"); //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>(); HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result; - var res = response.Content.ReadAsStringAsync().Result; + //var res = response.Content.ReadAsStringAsync().Result; + string res = ""; + if (response.IsSuccessStatusCode) + { + using (var responseStream = await response.Content.ReadAsStreamAsync()) + { + using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress)) + { + using (var reader = new StreamReader(decompressedStream)) + { + res = await reader.ReadToEndAsync(); + // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹� + } + } + } + } + else + { + // 澶勭悊璇锋眰澶辫触鐨勬儏鍐� + } var document = parser.ParseDocument(res); var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault(); var contentList = sssdfsdfsd.QuerySelector("ul"); @@ -1282,13 +1492,50 @@ list11 = lists.ToList(); } + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -1338,7 +1585,7 @@ { var ssss = fujianya.Id; var sssss = fujianya.InnerHtml; - fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; + fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; } fujianhtml += "</tbody></table></div>"; @@ -1539,7 +1786,7 @@ //搴熸爣鍏憡 - public static void feibiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page) + public static async void feibiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page) { sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A")); @@ -1553,12 +1800,36 @@ string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page); try { - using (HttpClient client = new HttpClient()) + HttpClientHandler handler = new HttpClientHandler(); + handler.CookieContainer = new CookieContainer(); + using (HttpClient client = new HttpClient(handler)) { //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>(); client.Timeout = TimeSpan.FromSeconds(60); + client.DefaultRequestHeaders.Add("Accept", "*/*"); + client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br"); + client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn"); HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result; - var res = response.Content.ReadAsStringAsync().Result; + //var res = response.Content.ReadAsStringAsync().Result; + string res = ""; + if (response.IsSuccessStatusCode) + { + using (var responseStream = await response.Content.ReadAsStreamAsync()) + { + using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress)) + { + using (var reader = new StreamReader(decompressedStream)) + { + res = await reader.ReadToEndAsync(); + // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹� + } + } + } + } + else + { + // 澶勭悊璇锋眰澶辫触鐨勬儏鍐� + } var document = parser.ParseDocument(res); var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault(); var contentList = sssdfsdfsd.QuerySelector("ul"); @@ -1597,13 +1868,51 @@ list11 = lists.ToList(); } + + string[] bbb = null; + var scriptElements = document.QuerySelectorAll("script"); + foreach (var scriptElement in scriptElements) + { + var scriptText = scriptElement.TextContent; + + if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺 + { + + int startIndex = scriptText.IndexOf('"') + 1; + int endIndex = scriptText.IndexOf('"', startIndex); + if (startIndex > 0 && endIndex > startIndex) + { + var aaaa = scriptText.Substring(startIndex, endIndex - startIndex); + if (!string.IsNullOrEmpty(aaaa)) + { + bbb = aaaa.Split(','); + } + } + } + } + + + + + int ccc = 0; + foreach (var sichuanjieshoudtl1 in lists) { Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿 try { sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl(); - aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); + if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc])) + { + aaaaaaaa.pageurl = bbb[ccc]; + ccc = ccc + 1; + } + else + { + ccc = ccc + 1; + continue; + } + // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href"); logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷"); aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim(); @@ -1653,7 +1962,7 @@ { var ssss = fujianya.Id; var sssss = fujianya.InnerHtml; - fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; + fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>"; } fujianhtml += "</tbody></table></div>"; -- Gitblit v1.9.1