From 52267147e624f3a0daef4870ba72f023ef9162a9 Mon Sep 17 00:00:00 2001
From: username@email.com <yzy2002yzy@163.com>
Date: 星期六, 07 十月 2023 12:46:29 +0800
Subject: [PATCH] 修改中国政府采购网爬虫规则

---
 zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs |  153 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 138 insertions(+), 15 deletions(-)

diff --git a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
index 0dce7a2..f836bd6 100644
--- a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
+++ b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
@@ -3,7 +3,10 @@
 using System;
 using System.Collections.Generic;
 using System.Globalization;
+using System.IO;
+using System.IO.Compression;
 using System.Linq;
+using System.Net;
 using System.Net.Http;
 using System.Net.Http.Headers;
 using System.Text;
@@ -171,7 +174,7 @@
 
 
         //閲囪喘鍏憡
-        public static void caigougonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page, string cgfs, string cgfsName)
+        public static async void caigougonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page, string cgfs, string cgfsName)
         {
 
             sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -185,12 +188,36 @@
                 string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
                 try
                 {
-                    using (HttpClient client = new HttpClient())
+                    HttpClientHandler handler = new HttpClientHandler();
+                    handler.CookieContainer = new CookieContainer();
+                    using (HttpClient client = new HttpClient(handler))
                     {
                         client.Timeout = TimeSpan.FromSeconds(60);
+                        client.DefaultRequestHeaders.Add("Accept", "*/*");
+                        client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+                        client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
                         //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
                         HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
-                        var res = response.Content.ReadAsStringAsync().Result;
+                        string res = "";
+                        if (response.IsSuccessStatusCode)
+                        {
+                            using (var responseStream = await response.Content.ReadAsStreamAsync())
+                            {
+                                using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
+                                {
+                                    using (var reader = new StreamReader(decompressedStream))
+                                    {
+                                        res = await reader.ReadToEndAsync();
+                                        // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
+                        }
+                        //var res = response.Content.ReadAsStringAsync().Result;
                         var document = parser.ParseDocument(res);
                         var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
                         var contentList = sssdfsdfsd.QuerySelector("ul");
@@ -542,7 +569,7 @@
 
 
         //鎰忓悜鍏紑
-        public static void yixianggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime,string endTime, int page)
+        public static async void yixianggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime,string endTime, int page)
         {
 
             sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -556,12 +583,36 @@
                 string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
                 try
                 {
-                    using (HttpClient client = new HttpClient())
+                    HttpClientHandler handler = new HttpClientHandler();
+                    handler.CookieContainer = new CookieContainer();
+                    using (HttpClient client = new HttpClient(handler))
                     {
                         client.Timeout = TimeSpan.FromSeconds(60);
+                        client.DefaultRequestHeaders.Add("Accept", "*/*");
+                        client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+                        client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
                         //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
                         HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
-                        var res = response.Content.ReadAsStringAsync().Result;
+                        //var res = response.Content.ReadAsStringAsync().Result;
+                        string res = "";
+                        if (response.IsSuccessStatusCode)
+                        {
+                            using (var responseStream = await response.Content.ReadAsStreamAsync())
+                            {
+                                using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
+                                {
+                                    using (var reader = new StreamReader(decompressedStream))
+                                    {
+                                        res = await reader.ReadToEndAsync();
+                                        // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
+                        }
                         var document = parser.ParseDocument(res);
                         var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
                         var contentList = sssdfsdfsd.QuerySelector("ul");
@@ -857,7 +908,7 @@
 
 
         //缁撴灉鍏憡
-        public static void zhongbiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
+        public static async void zhongbiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
         {
 
             sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -871,12 +922,36 @@
                 string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
                 try
                 {
-                    using (HttpClient client = new HttpClient())
+                    HttpClientHandler handler = new HttpClientHandler();
+                    handler.CookieContainer = new CookieContainer();
+                    using (HttpClient client = new HttpClient(handler))
                     {
                         client.Timeout = TimeSpan.FromSeconds(60);
+                        client.DefaultRequestHeaders.Add("Accept", "*/*");
+                        client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+                        client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
                         //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
                         HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
-                        var res = response.Content.ReadAsStringAsync().Result;
+                        //var res = response.Content.ReadAsStringAsync().Result;
+                        string res = "";
+                        if (response.IsSuccessStatusCode)
+                        {
+                            using (var responseStream = await response.Content.ReadAsStreamAsync())
+                            {
+                                using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
+                                {
+                                    using (var reader = new StreamReader(decompressedStream))
+                                    {
+                                        res = await reader.ReadToEndAsync();
+                                        // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
+                        }
                         var document = parser.ParseDocument(res);
                         var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
                         var contentList = sssdfsdfsd.QuerySelector("ul");
@@ -1224,7 +1299,7 @@
         }
 
         //鏇存鍏憡
-        public static void gengzhenggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
+        public static async void gengzhenggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
         {
 
             sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -1238,12 +1313,36 @@
                 string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
                 try
                 {
-                    using (HttpClient client = new HttpClient())
+                    HttpClientHandler handler = new HttpClientHandler();
+                    handler.CookieContainer = new CookieContainer();
+                    using (HttpClient client = new HttpClient(handler))
                     {
                         client.Timeout = TimeSpan.FromSeconds(60);
+                        client.DefaultRequestHeaders.Add("Accept", "*/*");
+                        client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+                        client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
                         //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
                         HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
-                        var res = response.Content.ReadAsStringAsync().Result;
+                        //var res = response.Content.ReadAsStringAsync().Result;
+                        string res = "";
+                        if (response.IsSuccessStatusCode)
+                        {
+                            using (var responseStream = await response.Content.ReadAsStreamAsync())
+                            {
+                                using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
+                                {
+                                    using (var reader = new StreamReader(decompressedStream))
+                                    {
+                                        res = await reader.ReadToEndAsync();
+                                        // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
+                        }
                         var document = parser.ParseDocument(res);
                         var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
                         var contentList = sssdfsdfsd.QuerySelector("ul");
@@ -1539,7 +1638,7 @@
 
 
         //搴熸爣鍏憡
-        public static void feibiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
+        public static async void feibiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
         {
 
             sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -1553,12 +1652,36 @@
                 string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
                 try
                 {
-                    using (HttpClient client = new HttpClient())
+                    HttpClientHandler handler = new HttpClientHandler();
+                    handler.CookieContainer = new CookieContainer();
+                    using (HttpClient client = new HttpClient(handler))
                     {
                         //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
                         client.Timeout = TimeSpan.FromSeconds(60);
+                        client.DefaultRequestHeaders.Add("Accept", "*/*");
+                        client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+                        client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
                         HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
-                        var res = response.Content.ReadAsStringAsync().Result;
+                        //var res = response.Content.ReadAsStringAsync().Result;
+                        string res = "";
+                        if (response.IsSuccessStatusCode)
+                        {
+                            using (var responseStream = await response.Content.ReadAsStreamAsync())
+                            {
+                                using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
+                                {
+                                    using (var reader = new StreamReader(decompressedStream))
+                                    {
+                                        res = await reader.ReadToEndAsync();
+                                        // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
+                        }
                         var document = parser.ParseDocument(res);
                         var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
                         var contentList = sssdfsdfsd.QuerySelector("ul");

--
Gitblit v1.9.1