From d4431c7e89865a506af8662244004d0baa7ed609 Mon Sep 17 00:00:00 2001
From: username@email.com <yzy2002yzy@163.com>
Date: 星期三, 11 六月 2025 12:24:15 +0800
Subject: [PATCH] 投诉处理,爬
---
zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs | 3169 +++++++++++++++++++++++++++++++++-------------------------
1 files changed, 1,793 insertions(+), 1,376 deletions(-)
diff --git a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
index 821f402..37c8ea7 100644
--- a/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
+++ b/zhengcaioa/Crawler/sichuan/CcgpSichuanoperation.cs
@@ -3,10 +3,14 @@
using System;
using System.Collections.Generic;
using System.Globalization;
+using System.IO;
+using System.IO.Compression;
using System.Linq;
+using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
+using System.Text.RegularExpressions;
using System.Threading;
using zhengcaioa.Models;
namespace Crawler.sichuan
@@ -19,133 +23,139 @@
{
try
{
+
+
//鑾峰彇鏇存柊鏃堕棿
string operationStartTime = "";
string operationEndTime = "";
- Updatetime updatetime = _ccontext.Updatetimes.Where(x => x.Sheng == "涓浗鏀块噰缃戝洓宸濈渷").FirstOrDefault();
- if (updatetime != null)
+
+ logg.WriteLog("寮�濮嬪垽鏂椂闂�", "涓浗鏀块噰缃戝洓宸濈渷");
+
+ if (DateTime.Now.Hour == 0 )
{
- operationStartTime = updatetime.Updatetime1.ToString("yyyy-MM-dd");
- operationEndTime = DateTime.Now.ToString("yyyy-MM-dd");
- //updatetime.Updatetime1 = DateTime.Now.AddDays(-1);
- //_ccontext.SaveChanges();
+ DateTime datenow = DateTime.Now.Date;
+ Updatetime updatetime = _ccontext.Updatetimes.Where(x => x.Sheng == "涓浗鏀块噰缃戝洓宸濈渷").FirstOrDefault();
+ logg.WriteLog("datenow="+datenow.ToString("yyyy-MM-dd HH:mm:ss"), "涓浗鏀块噰缃戝洓宸濈渷");
+ operationStartTime = datenow.AddDays(-1).ToString("yyyy-MM-dd");
+ operationEndTime = datenow.AddDays(-1).ToString("yyyy-MM-dd");
+ logg.WriteLog("operationStartTime=" + operationStartTime, "涓浗鏀块噰缃戝洓宸濈渷");
+ logg.WriteLog("operationEndTime=" + operationEndTime, "涓浗鏀块噰缃戝洓宸濈渷");
+ var currPage = 1;
+ string sichuanpageurll = "";
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 鎰忓悜鍏紑
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=9&dbselect=bidx&kw=%E6%84%8F%E5%90%91&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎰忓悜鍏紑 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ yixianggonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎰忓悜鍏紑 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+
+ #region 鍏紑鎷涙爣
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=1&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍏紑鎷涙爣 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "1", "鍏紑鎷涙爣");
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍏紑鎷涙爣 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+
+ #region 璇环
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=2&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 璇环 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "4", "璇环");
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 璇环 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 绔炰簤鎬ц皥鍒�
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=3&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬ц皥鍒� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "5", "绔炰簤鎬ц皥鍒�");
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬ц皥鍒� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 鍗曚竴鏉ユ簮
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=4&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍗曚竴鏉ユ簮 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "7", "鍗曚竴鏉ユ簮");
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍗曚竴鏉ユ簮 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 閭�璇锋嫑鏍�
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=6&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 閭�璇锋嫑鏍� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "2", "閭�璇锋嫑鏍�");
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 閭�璇锋嫑鏍� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 绔炰簤鎬х鍟�
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=10&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬х鍟� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "3", "绔炰簤鎬х鍟�");
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬х鍟� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 涓爣鍏憡
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=7&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷涓爣鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ zhongbiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷涓爣鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 鎴愪氦鍏憡
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=11&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎴愪氦鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ zhongbiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎴愪氦鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 鏇存鍏憡
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=8&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鏇存鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ gengzhenggonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鏇存鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+ //Thread.CurrentThread.Join(1000 * 60 * 30);//闃绘璁惧畾鏃堕棿
+ #region 搴熸爣鍏憡
+ currPage = 1;
+ sichuanpageurll = "https://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=12&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷搴熸爣鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
+ feibiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
+ logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷搴熸爣鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
+ #endregion
+
+
+
+ //淇濆瓨鏈�鍚庢洿鏂版椂闂�
+ if (updatetime != null)
+ {
+ updatetime.Updatetime1 = datenow;
+ _ccontext.SaveChanges();
+ }
+ else
+ {
+ updatetime = new Updatetime();
+ updatetime.Id = Guid.NewGuid().ToString();
+ updatetime.Sheng = "涓浗鏀块噰缃戝洓宸濈渷";
+ updatetime.Updatetime1 = datenow;
+ _ccontext.Updatetimes.Add(updatetime);
+ _ccontext.SaveChanges();
+ }
+
+ Thread.CurrentThread.Join(1000 * 60 * 60);//闃绘璁惧畾鏃堕棿
}
- else
- {
- operationStartTime = "2021-12-01"; //DateTime.Now.AddYears(-1).ToString("yyyy-MM-dd");
- operationEndTime = "2022-12-01";//DateTime.Now.ToString("yyyy-MM-dd");
- }
- var currPage = 1;
- string sichuanpageurll = "";
-
- #region 鎰忓悜鍏紑
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=9&dbselect=bidx&kw=%E6%84%8F%E5%90%91&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎰忓悜鍏紑 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- yixianggonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎰忓悜鍏紑 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
-
-
-
- #region 鍏紑鎷涙爣
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=1&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍏紑鎷涙爣 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "1", "鍏紑鎷涙爣");
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍏紑鎷涙爣 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
-
- #region 璇环
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=2&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 璇环 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "4", "璇环");
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 璇环 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
- #region 绔炰簤鎬ц皥鍒�
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=3&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬ц皥鍒� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "5", "绔炰簤鎬ц皥鍒�");
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬ц皥鍒� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
- #region 鍗曚竴鏉ユ簮
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=4&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍗曚竴鏉ユ簮 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "7", "鍗曚竴鏉ユ簮");
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 鍗曚竴鏉ユ簮 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
- #region 閭�璇锋嫑鏍�
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=6&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 閭�璇锋嫑鏍� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "2", "閭�璇锋嫑鏍�");
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 閭�璇锋嫑鏍� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
- #region 绔炰簤鎬х鍟�
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=10&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬х鍟� 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- caigougonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage, "3", "绔炰簤鎬х鍟�");
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷閲囪喘鍏憡 绔炰簤鎬х鍟� 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
-
-
-
- #region 涓爣鍏憡
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=7&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷涓爣鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- zhongbiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷涓爣鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
-
-
- #region 鎴愪氦鍏憡
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=11&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎴愪氦鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- zhongbiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鎴愪氦鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
-
-
- #region 鏇存鍏憡
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=8&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鏇存鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- gengzhenggonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime, currPage);
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷鏇存鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
-
-
- #region 搴熸爣鍏憡
- currPage = 1;
- sichuanpageurll = "http://search.ccgp.gov.cn/bxsearch?searchtype=1&page_index=1&bidSort=0&buyerName=&projectId=&pinMu=0&bidType=12&dbselect=bidx&kw=&start_time=&end_time=&timeType=6&displayZone=%E5%9B%9B%E5%B7%9D&zoneId=51&pppStatus=&agentName=";
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷搴熸爣鍏憡 寮�濮嬭幏鍙�", "涓浗鏀块噰缃戝洓宸濈渷");
- feibiaogonggao(_ccontext, sichuanpageurll, operationStartTime, operationEndTime , currPage);
- logg.WriteLog("涓浗鏀块噰缃戝洓宸濈渷搴熸爣鍏憡 寮�濮嬭幏鍙栫粨鏉�", "涓浗鏀块噰缃戝洓宸濈渷");
- #endregion
-
-
-
- //淇濆瓨鏈�鍚庢洿鏂版椂闂�
- if (updatetime != null)
- {
- updatetime.Updatetime1 = DateTime.Now.AddDays(-1);
- _ccontext.SaveChanges();
- }
- else
- {
- updatetime = new Updatetime();
- updatetime.Id = Guid.NewGuid().ToString();
- updatetime.Sheng = "涓浗鏀块噰缃戝洓宸濈渷";
- updatetime.Updatetime1 = DateTime.Now.AddDays(-1);
- _ccontext.Updatetimes.Add(updatetime);
- _ccontext.SaveChanges();
- }
+ //datenow = DateTime.Now.Date.AddDays(1);
+
}
catch (Exception ex)
{
@@ -165,7 +175,7 @@
//閲囪喘鍏憡
- public static void caigougonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page, string cgfs, string cgfsName)
+ public static async void caigougonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page, string cgfs, string cgfsName)
{
sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -174,335 +184,414 @@
var list11 = new List<AngleSharp.Dom.IElement>();
while (true)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
logg.WriteLog(page.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
- using (HttpClient client = new HttpClient())
+ try
{
- //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
- HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
- var res = response.Content.ReadAsStringAsync().Result;
- var document = parser.ParseDocument(res);
- var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
- var contentList = sssdfsdfsd.QuerySelector("ul");
- if (contentList != null)
+ HttpClientHandler handler = new HttpClientHandler();
+ handler.CookieContainer = new CookieContainer();
+ using (HttpClient client = new HttpClient(handler))
{
- var lists = contentList.QuerySelectorAll("li");
- if (lists == null || lists.Length == 0)
+ client.Timeout = TimeSpan.FromSeconds(60);
+ client.DefaultRequestHeaders.Add("Accept", "*/*");
+ client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+ client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
+ //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
+ HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
+ string res = "";
+ if (response.IsSuccessStatusCode)
{
- break;
- }
- if (page != 1)
- {
- var list22 = lists.ToList();
- var breakable = true;
- if (list11.Count == list22.Count)
+ using (var responseStream = await response.Content.ReadAsStreamAsync())
{
- foreach (var list11111 in list11)
+ using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
{
- var listcount = list22.Count(x=>x.InnerHtml == list11111.InnerHtml);
-
- if (listcount <= 0)
+ using (var reader = new StreamReader(decompressedStream))
{
- breakable = false;
- break;
+ res = await reader.ReadToEndAsync();
+ // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
}
}
- if (breakable)
- {
- break;
- }
}
- list11 = list22;
}
else
{
- list11 = lists.ToList();
+ // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
}
-
-
-
- foreach (var sichuanjieshoudtl1 in lists)
+ //var res = response.Content.ReadAsStringAsync().Result;
+ var document = parser.ParseDocument(res);
+ var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
+ var contentList = sssdfsdfsd.QuerySelector("ul");
+ if (contentList != null)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
- try
+ var lists = contentList.QuerySelectorAll("li");
+ if (lists == null || lists.Length == 0)
{
- sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
-
- logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
- aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim() ;
- aaaaaaaa.shorttitle = aaaaaaaa.title;
-
-
-
- string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|","").Split("\n");
- string Purchaser = null;
- string Agency = null;
- var NoticeTime = sssssss[0].Trim();
- foreach(var ssss in sssssss)
+ break;
+ }
+ if (page != 1)
+ {
+ var list22 = lists.ToList();
+ var breakable = true;
+ if (list11.Count == list22.Count)
{
- if (ssss.IndexOf("閲囪喘浜�")>=0)
+ foreach (var list11111 in list11)
{
- Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
+
+ if (listcount <= 0)
+ {
+ breakable = false;
+ break;
+ }
}
- if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ if (breakable)
{
- Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ break;
}
}
- using (HttpClient clientdtl = new HttpClient())
+ list11 = list22;
+ }
+ else
+ {
+ list11 = lists.ToList();
+ }
+
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
{
- var zhengfuProjectcount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "02" && x.OpenTenderTime == DateTime.Parse(NoticeTime)));
-
-
- if (zhengfuProjectcount <= 0)
- {
- HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
- var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
- var documentdtl = parser.ParseDocument(resdtl);
-
- var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
-
- var content = dtl.OuterHtml;
-
- var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
-
- var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList() ;
- if(fujians!=null&& fujians.Count > 0)
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
{
- var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
- foreach (var fujianya in fujians)
- {
- var ssss = fujianya.Id;
- var sssss = fujianya.InnerHtml;
- fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">"+ sssss + "</a><br></td></tr>";
- }
- fujianhtml += "</tbody></table></div>";
-
- content += fujianhtml;
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
+ {
+ bbb = aaaa.Split(',');
}
+ }
+ }
+ }
-
+
- string OpenTenderCode = null;
- decimal? Budget = null;
- DateTime? OpenTenderTime = null;
- var td = dtl.QuerySelectorAll("p");
+ int ccc = 0;
- if (td != null && td.Length>0)
+ foreach (var sichuanjieshoudtl1 in lists)
+ {
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ try
+ {
+ sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
+ if(bbb!=null && bbb.Length>= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
+
+ logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
+ aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
+ aaaaaaaa.shorttitle = aaaaaaaa.title;
+
+
+
+ string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
+ string Purchaser = null;
+ string Agency = null;
+ var NoticeTime = sssssss[0].Trim();
+ foreach (var ssss in sssssss)
+ {
+ if (ssss.IndexOf("閲囪喘浜�") >= 0)
{
+ Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ {
+ Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ }
+ using (HttpClient clientdtl = new HttpClient())
+ {
+ var notime = DateTime.Parse(NoticeTime).ToString("yyyy-MM-dd");
+ var notimestart = DateTime.Parse(notime);
+ var notimeend = notimestart.AddDays(1);
+ var zhengfuProjectcount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "02" && x.NoticeTime >= notimestart && x.NoticeTime < notimeend) );
- for (int i = 0; i < td.Length; i++)
+ if (zhengfuProjectcount <= 0)
+ {
+ clientdtl.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
+ var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
+ var documentdtl = parser.ParseDocument(resdtl);
+
+ var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
+
+ var content = dtl.OuterHtml;
+
+ var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
+
+ var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
+ if (fujians != null && fujians.Count > 0)
{
- if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0 && td[i].TextContent.IndexOf("銆�") < 0)
+ var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
+ foreach (var fujianya in fujians)
{
- if (string.IsNullOrEmpty(OpenTenderCode))
- {
- OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("1锛�", "").Replace("/", "").Trim();
- }
-
-
-
+ var ssss = fujianya.Id;
+ var sssss = fujianya.InnerHtml;
+ fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
}
- if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
- {
+ fujianhtml += "</tbody></table></div>";
- if(td[i].TextContent.IndexOf("涓囧厓") >= 0)
+ content += fujianhtml;
+ }
+
+
+
+ string OpenTenderCode = null;
+ decimal? Budget = null;
+ DateTime? OpenTenderTime = null;
+
+ var td = dtl.QuerySelectorAll("p");
+
+ if (td != null && td.Length > 0)
+ {
+
+
+ for (int i = 0; i < td.Length; i++)
+ {
+ if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0 && td[i].TextContent.IndexOf("銆�") < 0)
{
- var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("3锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
- decimal a = 0;
- if (decimal.TryParse(yusuan, out a))
+ if (string.IsNullOrEmpty(OpenTenderCode))
{
- Budget = a * 10000;
+ OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("1锛�", "").Replace("/", "").Trim();
+ }
+
+
+
+ }
+ if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
+ {
+
+ if (td[i].TextContent.IndexOf("涓囧厓") >= 0)
+ {
+ var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("3锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
+ decimal a = 0;
+ if (decimal.TryParse(yusuan, out a))
+ {
+ Budget = a * 10000;
+ }
+
+
+ }
+ else
+ {
+ var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("3锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("鍏�", "").Replace("浜烘皯甯�", "").Trim();
+ decimal a = 0;
+ if (decimal.TryParse(yusuan, out a))
+ {
+ Budget = a;
+ }
+ }
+ }
+
+
+ if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
+ {
+
+
+ DateTime a;
+ var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("鍖椾含鏃堕棿锛�", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鏃�", ":").Replace("鍒嗭紙", "").Replace("鍒�", ":").Replace("绉�", "").Replace("锛�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
+ {
+ OpenTenderTime = a;
}
}
- else
+ if (td[i].TextContent.IndexOf("1.鏃堕棿锛�") >= 0)
{
- var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("3锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("鍏�", "").Replace("浜烘皯甯�", "").Trim();
- decimal a = 0;
- if (decimal.TryParse(yusuan, out a))
+ if (!OpenTenderTime.HasValue)
{
- Budget = a;
- }
- }
- }
-
-
- if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
- {
-
-
DateTime a;
- var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("鍖椾含鏃堕棿锛�", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鏃�", ":").Replace("鍒嗭紙", "").Replace("鍒�", ":").Replace("绉�", "").Replace("锛�", "").Trim();
- if (DateTime.TryParse(sss, out a))
+ var sss = td[i].TextContent.Replace("1.鏃堕棿锛�", "").Replace("鍖椾含鏃堕棿锛�", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鏃�", ":").Replace("鍒嗭紙", "").Replace("鍒�", ":").Replace("绉�", "").Replace("锛�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
{
OpenTenderTime = a;
}
-
-
- }
- if (td[i].TextContent.IndexOf("1.鏃堕棿锛�") >= 0)
- {
- if (!OpenTenderTime.HasValue)
- {
- DateTime a;
- var sss = td[i].TextContent.Replace("1.鏃堕棿锛�", "").Replace("鍖椾含鏃堕棿锛�", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鏃�", ":").Replace("鍒嗭紙", "").Replace("鍒�", ":").Replace("绉�", "").Replace("锛�", "").Trim();
- if (DateTime.TryParse(sss, out a))
- {
- OpenTenderTime = a;
}
+
+
+
+
}
+ if (td[i].TextContent.StartsWith("鏃堕棿锛�"))
+ {
+ if (!OpenTenderTime.HasValue)
+ {
+ DateTime a;
+ var sss = td[i].TextContent.Replace("鏃堕棿锛�", "").Replace("鍖椾含鏃堕棿锛�", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鏃�", ":").Replace("鍒嗭紙", "").Replace("鍒�", ":").Replace("绉�", "").Replace("锛�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
+ {
+ OpenTenderTime = a;
+ }
+ }
+
+
+ }
}
-
- if (td[i].TextContent.StartsWith("鏃堕棿锛�"))
- {
- if (!OpenTenderTime.HasValue)
- {
- DateTime a;
- var sss = td[i].TextContent.Replace("鏃堕棿锛�", "").Replace("鍖椾含鏃堕棿锛�", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鏃�", ":").Replace("鍒嗭紙", "").Replace("鍒�", ":").Replace("绉�", "").Replace("锛�", "").Trim();
- if (DateTime.TryParse(sss, out a))
- {
- OpenTenderTime = a;
- }
- }
-
-
-
-
- }
-
- }
}
-
- var zhengfuProject = new ZhengfuProject();
- zhengfuProject.Id = Guid.NewGuid().ToString();
- zhengfuProject.Gglx = "02";
- string gglxName = "閲囪喘鍏憡";
- zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
- zhengfuProject.OpenTenderTime = OpenTenderTime;
- zhengfuProject.RegionCode = null;
- //regionName = regionName;
- zhengfuProject.Sheng = "510000";
- string ShengName = "鍥涘窛鐪�";
- zhengfuProject.City = null ;
- //CityName = CityName;
+
+ var zhengfuProject = new ZhengfuProject();
+ zhengfuProject.Id = Guid.NewGuid().ToString();
+ zhengfuProject.Gglx = "02";
+ string gglxName = "閲囪喘鍏憡";
+ zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
+ zhengfuProject.OpenTenderTime = OpenTenderTime;
+ zhengfuProject.RegionCode = null;
+ //regionName = regionName;
+ zhengfuProject.Sheng = "510000";
+ string ShengName = "鍥涘窛鐪�";
+ zhengfuProject.City = null;
+ //CityName = CityName;
- zhengfuProject.Cgfs = cgfs;
+ zhengfuProject.Cgfs = cgfs;
- zhengfuProject.OpenTenderCode = OpenTenderCode;
- zhengfuProject.Budget = Budget;
+ zhengfuProject.OpenTenderCode = OpenTenderCode;
+ zhengfuProject.Budget = Budget;
- zhengfuProject.Title = aaaaaaaa.title;
- zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
- zhengfuProject.Pageurl = aaaaaaaa.pageurl;
- zhengfuProject.Purchaser = Purchaser;
+ zhengfuProject.Title = aaaaaaaa.title;
+ zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
+ zhengfuProject.Pageurl = aaaaaaaa.pageurl;
+ zhengfuProject.Purchaser = Purchaser;
- zhengfuProject.Agency = Agency;
- zhengfuProject.AgencyCode = null;
- zhengfuProject.Content = content;
- zhengfuProject.RecStatus = "A";
- zhengfuProject.Creater = "1";
- zhengfuProject.Createtime = DateTime.Now;
- zhengfuProject.Modifier = "1";
- zhengfuProject.Modifytime = zhengfuProject.Createtime;
- _ccontext.ZhengfuProjects.Add(zhengfuProject);
+ zhengfuProject.Agency = Agency;
+ zhengfuProject.AgencyCode = null;
+ zhengfuProject.Content = content;
+ zhengfuProject.RecStatus = "A";
+ zhengfuProject.Creater = "1";
+ zhengfuProject.Createtime = DateTime.Now;
+ zhengfuProject.Modifier = "1";
+ zhengfuProject.Modifytime = zhengfuProject.Createtime;
+ _ccontext.ZhengfuProjects.Add(zhengfuProject);
- string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
- string result = string.Empty;
- Uri postUrl = new Uri(url);
- eswebcrawler eswebcrawler1 = new eswebcrawler();
- eswebcrawler1.Id = zhengfuProject.Id;
- eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
- eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
- eswebcrawler1.sheng = zhengfuProject.Sheng;
- eswebcrawler1.shengName = ShengName;
- eswebcrawler1.city = zhengfuProject.City;
- eswebcrawler1.cityName = null;
- eswebcrawler1.regionCode = zhengfuProject.RegionCode;
- eswebcrawler1.regionName = null;
- eswebcrawler1.cgfs = zhengfuProject.Cgfs;
- eswebcrawler1.cgfsName = cgfsName;
- eswebcrawler1.gglx = zhengfuProject.Gglx;
- eswebcrawler1.gglxName = gglxName;
- eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
- eswebcrawler1.title = zhengfuProject.Title;
- eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
- eswebcrawler1.pageurl = zhengfuProject.Pageurl;
- eswebcrawler1.pingmu = zhengfuProject.Pingmu;
- eswebcrawler1.pingmuName = "";
- eswebcrawler1.purchaser = zhengfuProject.Purchaser;
- eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
- eswebcrawler1.agency = zhengfuProject.Agency;
- eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
- eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
+ string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
+ string result = string.Empty;
+ Uri postUrl = new Uri(url);
+ eswebcrawler eswebcrawler1 = new eswebcrawler();
+ eswebcrawler1.Id = zhengfuProject.Id;
+ eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
+ eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
+ eswebcrawler1.sheng = zhengfuProject.Sheng;
+ eswebcrawler1.shengName = ShengName;
+ eswebcrawler1.city = zhengfuProject.City;
+ eswebcrawler1.cityName = null;
+ eswebcrawler1.regionCode = zhengfuProject.RegionCode;
+ eswebcrawler1.regionName = null;
+ eswebcrawler1.cgfs = zhengfuProject.Cgfs;
+ eswebcrawler1.cgfsName = cgfsName;
+ eswebcrawler1.gglx = zhengfuProject.Gglx;
+ eswebcrawler1.gglxName = gglxName;
+ eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
+ eswebcrawler1.title = zhengfuProject.Title;
+ eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
+ eswebcrawler1.pageurl = zhengfuProject.Pageurl;
+ eswebcrawler1.pingmu = zhengfuProject.Pingmu;
+ eswebcrawler1.pingmuName = "";
+ eswebcrawler1.purchaser = zhengfuProject.Purchaser;
+ eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
+ eswebcrawler1.agency = zhengfuProject.Agency;
+ eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
+ eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
- string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
+ string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
- using (HttpContent httpContent = new StringContent(requestJson))
- {
- httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
- using (HttpClient httpClient = new HttpClient())
+ using (HttpContent httpContent = new StringContent(requestJson))
{
- httpClient.Timeout = TimeSpan.FromSeconds(60);
- HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
- result = responseMessage.Content.ReadAsStringAsync().Result;
+ httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
+ using (HttpClient httpClient = new HttpClient())
+ {
+ httpClient.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
+ result = responseMessage.Content.ReadAsStringAsync().Result;
+ }
+
}
+ Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
+
+ if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
+ {
+
+ }
+ else
+ {
+ throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
+ }
+
+
+ _ccontext.SaveChanges();
+
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
}
-
- Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
-
- if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
- {
-
- }
- else
- {
- throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
- }
-
-
- _ccontext.SaveChanges();
-
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
}
}
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ }
+
+
}
- catch (Exception ex)
- {
- logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
-
- logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
-
- logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
- }
-
-
}
}
+
+ page += 1;
}
- page += 1;
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 60 * 5);//闃绘璁惧畾鏃堕棿
+ }
+
+
}
@@ -516,7 +605,7 @@
//鎰忓悜鍏紑
- public static void yixianggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime,string endTime, int page)
+ public static async void yixianggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime,string endTime, int page)
{
sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -525,278 +614,361 @@
var list11 = new List<AngleSharp.Dom.IElement>();
while (true)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
logg.WriteLog(page.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
- using (HttpClient client = new HttpClient())
+ try
{
- //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
- HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
- var res = response.Content.ReadAsStringAsync().Result;
- var document = parser.ParseDocument(res);
- var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
- var contentList = sssdfsdfsd.QuerySelector("ul");
- if (contentList != null)
+ HttpClientHandler handler = new HttpClientHandler();
+ handler.CookieContainer = new CookieContainer();
+ using (HttpClient client = new HttpClient(handler))
{
- var lists = contentList.QuerySelectorAll("li");
- if (lists == null || lists.Length == 0)
+ client.Timeout = TimeSpan.FromSeconds(60);
+ client.DefaultRequestHeaders.Add("Accept", "*/*");
+ client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+ client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
+ //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
+ HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
+ //var res = response.Content.ReadAsStringAsync().Result;
+ string res = "";
+ if (response.IsSuccessStatusCode)
{
- break;
- }
- if (page != 1)
- {
- var list22 = lists.ToList();
- var breakable = true;
- if (list11.Count == list22.Count)
+ using (var responseStream = await response.Content.ReadAsStreamAsync())
{
- foreach (var list11111 in list11)
+ using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
{
- var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
-
- if (listcount <= 0)
+ using (var reader = new StreamReader(decompressedStream))
{
- breakable = false;
- break;
+ res = await reader.ReadToEndAsync();
+ // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
}
}
- if (breakable)
- {
- break;
- }
}
- list11 = list22;
}
else
{
- list11 = lists.ToList();
+ // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
}
- foreach (var sichuanjieshoudtl1 in lists)
+ var document = parser.ParseDocument(res);
+ var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
+ var contentList = sssdfsdfsd.QuerySelector("ul");
+ if (contentList != null)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
- try
+ var lists = contentList.QuerySelectorAll("li");
+ if (lists == null || lists.Length == 0)
{
- sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
-
- logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
- aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
- aaaaaaaa.shorttitle = aaaaaaaa.title;
-
-
-
- string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
- string Purchaser = null;
- string Agency = null;
- var NoticeTime = sssssss[0].Trim();
- foreach (var ssss in sssssss)
+ break;
+ }
+ if (page != 1)
+ {
+ var list22 = lists.ToList();
+ var breakable = true;
+ if (list11.Count == list22.Count)
{
- if (ssss.IndexOf("閲囪喘浜�") >= 0)
+ foreach (var list11111 in list11)
{
- Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
+
+ if (listcount <= 0)
+ {
+ breakable = false;
+ break;
+ }
}
- if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ if (breakable)
{
- Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ break;
}
}
- using (HttpClient clientdtl = new HttpClient())
+ list11 = list22;
+ }
+ else
+ {
+ list11 = lists.ToList();
+ }
+
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
{
- var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "01" && x.OpenTenderTime == DateTime.Parse(NoticeTime)));
- if (zhengfuProjectCount <=0)
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
{
- HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
- var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
- var documentdtl = parser.ParseDocument(resdtl);
-
- var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
-
- var content = dtl.OuterHtml;
-
- var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
-
- var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
- if (fujians != null && fujians.Count > 0)
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
{
- var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
- foreach (var fujianya in fujians)
- {
- var ssss = fujianya.Id;
- var sssss = fujianya.InnerHtml;
- fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
- }
- fujianhtml += "</tbody></table></div>";
-
- content += fujianhtml;
+ bbb = aaaa.Split(',');
}
-
-
-
- string OpenTenderCode = null;
- decimal? Budget = null;
- DateTime? OpenTenderTime = null;
-
- var td = dtl.QuerySelectorAll("p");
-
- if (td != null && td.Length > 0)
- {
-
-
- for (int i = 0; i < td.Length; i++)
- {
- if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0)
- {
-
- OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("/","").Trim();
-
- }
- if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
- {
- var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
- decimal a = 0;
- if (decimal.TryParse(yusuan, out a))
- {
- Budget = a * 10000;
- }
-
-
- }
-
- if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
- {
-
-
- DateTime a;
- var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
- if (DateTime.TryParse(sss, out a))
- {
- OpenTenderTime = a;
- }
-
-
- }
- }
- }
-
-
-
-
-
-
- var zhengfuProject = new ZhengfuProject();
- zhengfuProject.Id = Guid.NewGuid().ToString();
- zhengfuProject.Gglx = "01";
- string gglxName = "鎰忓悜鍏紑";
- zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
- zhengfuProject.OpenTenderTime = OpenTenderTime;
- zhengfuProject.RegionCode = null;
- //regionName = regionName;
- zhengfuProject.Sheng = "510000";
- string ShengName = "鍥涘窛鐪�";
- zhengfuProject.City = null;
- //CityName = CityName;
-
-
- zhengfuProject.Cgfs = null;
-
-
- zhengfuProject.OpenTenderCode = OpenTenderCode;
- zhengfuProject.Budget = Budget;
-
-
- zhengfuProject.Title = aaaaaaaa.title;
- zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
- zhengfuProject.Pageurl = aaaaaaaa.pageurl;
- zhengfuProject.Purchaser = Purchaser;
-
-
-
-
- zhengfuProject.Agency = Agency;
- zhengfuProject.AgencyCode = null;
- zhengfuProject.Content = content;
- zhengfuProject.RecStatus = "A";
- zhengfuProject.Creater = "1";
- zhengfuProject.Createtime = DateTime.Now;
- zhengfuProject.Modifier = "1";
- zhengfuProject.Modifytime = zhengfuProject.Createtime;
- _ccontext.ZhengfuProjects.Add(zhengfuProject);
-
-
- string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
- string result = string.Empty;
- Uri postUrl = new Uri(url);
- eswebcrawler eswebcrawler1 = new eswebcrawler();
- eswebcrawler1.Id = zhengfuProject.Id;
- eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
- eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
- eswebcrawler1.sheng = zhengfuProject.Sheng;
- eswebcrawler1.shengName = ShengName;
- eswebcrawler1.city = zhengfuProject.City;
- eswebcrawler1.cityName = null;
- eswebcrawler1.regionCode = zhengfuProject.RegionCode;
- eswebcrawler1.regionName = null;
- eswebcrawler1.cgfs = zhengfuProject.Cgfs;
- eswebcrawler1.cgfsName = null;
- eswebcrawler1.gglx = zhengfuProject.Gglx;
- eswebcrawler1.gglxName = gglxName;
- eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
- eswebcrawler1.title = zhengfuProject.Title;
- eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
- eswebcrawler1.pageurl = zhengfuProject.Pageurl;
- eswebcrawler1.pingmu = zhengfuProject.Pingmu;
- eswebcrawler1.pingmuName = "";
- eswebcrawler1.purchaser = zhengfuProject.Purchaser;
- eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
- eswebcrawler1.agency = zhengfuProject.Agency;
- eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
- eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
-
- string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
-
- using (HttpContent httpContent = new StringContent(requestJson))
- {
- httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
- using (HttpClient httpClient = new HttpClient())
- {
- httpClient.Timeout = TimeSpan.FromSeconds(60);
- HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
- result = responseMessage.Content.ReadAsStringAsync().Result;
- }
-
- }
-
- Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
-
- if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
- {
-
- }
- else
- {
- throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
- }
-
-
- _ccontext.SaveChanges();
-
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
}
}
}
- catch (Exception ex)
+
+
+
+
+ int ccc = 0;
+
+ foreach (var sichuanjieshoudtl1 in lists)
{
- logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ try
+ {
+ sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
- logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+ logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
+ aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
+ aaaaaaaa.shorttitle = aaaaaaaa.title;
- logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+
+
+ string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
+ string Purchaser = null;
+ string Agency = null;
+ var NoticeTime = sssssss[0].Trim();
+ foreach (var ssss in sssssss)
+ {
+ if (ssss.IndexOf("閲囪喘浜�") >= 0)
+ {
+ Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ {
+ Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ }
+ using (HttpClient clientdtl = new HttpClient())
+ {
+ var notime = DateTime.Parse(NoticeTime).ToString("yyyy-MM-dd");
+ var notimestart = DateTime.Parse(notime);
+ var notimeend = notimestart.AddDays(1);
+ var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "01" && x.NoticeTime >= notimestart && x.NoticeTime < notimeend));
+ if (zhengfuProjectCount <= 0)
+ {
+ clientdtl.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
+ var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
+ var documentdtl = parser.ParseDocument(resdtl);
+
+ var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
+
+ var content = dtl.OuterHtml;
+
+ var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
+
+ var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
+ if (fujians != null && fujians.Count > 0)
+ {
+ var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
+ foreach (var fujianya in fujians)
+ {
+ var ssss = fujianya.Id;
+ var sssss = fujianya.InnerHtml;
+ fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
+ }
+ fujianhtml += "</tbody></table></div>";
+
+ content += fujianhtml;
+ }
+
+
+
+ string OpenTenderCode = null;
+ decimal? Budget = null;
+ DateTime? OpenTenderTime = null;
+
+ var td = dtl.QuerySelectorAll("p");
+
+ if (td != null && td.Length > 0)
+ {
+
+
+ for (int i = 0; i < td.Length; i++)
+ {
+ if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0)
+ {
+ if (string.IsNullOrEmpty(OpenTenderCode))
+ OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("/", "").Trim();
+
+ }
+ if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
+ {
+ var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
+ decimal a = 0;
+ if (decimal.TryParse(yusuan, out a))
+ {
+ Budget = a * 10000;
+ }
+
+
+ }
+
+ if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
+ {
+
+
+ DateTime a;
+ var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
+ {
+ OpenTenderTime = a;
+ }
+
+
+ }
+ }
+ }
+
+
+
+
+
+
+ var zhengfuProject = new ZhengfuProject();
+ zhengfuProject.Id = Guid.NewGuid().ToString();
+ zhengfuProject.Gglx = "01";
+ string gglxName = "鎰忓悜鍏紑";
+ zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
+ zhengfuProject.OpenTenderTime = OpenTenderTime;
+ zhengfuProject.RegionCode = null;
+ //regionName = regionName;
+ zhengfuProject.Sheng = "510000";
+ string ShengName = "鍥涘窛鐪�";
+ zhengfuProject.City = null;
+ //CityName = CityName;
+
+
+ zhengfuProject.Cgfs = null;
+
+
+ zhengfuProject.OpenTenderCode = OpenTenderCode;
+ zhengfuProject.Budget = Budget;
+
+
+ zhengfuProject.Title = aaaaaaaa.title;
+ zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
+ zhengfuProject.Pageurl = aaaaaaaa.pageurl;
+ zhengfuProject.Purchaser = Purchaser;
+
+
+
+
+ zhengfuProject.Agency = Agency;
+ zhengfuProject.AgencyCode = null;
+ zhengfuProject.Content = content;
+ zhengfuProject.RecStatus = "A";
+ zhengfuProject.Creater = "1";
+ zhengfuProject.Createtime = DateTime.Now;
+ zhengfuProject.Modifier = "1";
+ zhengfuProject.Modifytime = zhengfuProject.Createtime;
+ _ccontext.ZhengfuProjects.Add(zhengfuProject);
+
+
+ string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
+ string result = string.Empty;
+ Uri postUrl = new Uri(url);
+ eswebcrawler eswebcrawler1 = new eswebcrawler();
+ eswebcrawler1.Id = zhengfuProject.Id;
+ eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
+ eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
+ eswebcrawler1.sheng = zhengfuProject.Sheng;
+ eswebcrawler1.shengName = ShengName;
+ eswebcrawler1.city = zhengfuProject.City;
+ eswebcrawler1.cityName = null;
+ eswebcrawler1.regionCode = zhengfuProject.RegionCode;
+ eswebcrawler1.regionName = null;
+ eswebcrawler1.cgfs = zhengfuProject.Cgfs;
+ eswebcrawler1.cgfsName = null;
+ eswebcrawler1.gglx = zhengfuProject.Gglx;
+ eswebcrawler1.gglxName = gglxName;
+ eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
+ eswebcrawler1.title = zhengfuProject.Title;
+ eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
+ eswebcrawler1.pageurl = zhengfuProject.Pageurl;
+ eswebcrawler1.pingmu = zhengfuProject.Pingmu;
+ eswebcrawler1.pingmuName = "";
+ eswebcrawler1.purchaser = zhengfuProject.Purchaser;
+ eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
+ eswebcrawler1.agency = zhengfuProject.Agency;
+ eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
+ eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
+
+ string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
+
+ using (HttpContent httpContent = new StringContent(requestJson))
+ {
+ httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
+ using (HttpClient httpClient = new HttpClient())
+ {
+ httpClient.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
+ result = responseMessage.Content.ReadAsStringAsync().Result;
+ }
+
+ }
+
+ Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
+
+ if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
+ {
+
+ }
+ else
+ {
+ throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
+ }
+
+
+ _ccontext.SaveChanges();
+
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ }
+
+
}
-
-
}
}
+
+ page += 1;
}
- page += 1;
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 60 * 5);//闃绘璁惧畾鏃堕棿
+ }
+
+
+
}
@@ -810,7 +982,7 @@
//缁撴灉鍏憡
- public static void zhongbiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
+ public static async void zhongbiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
{
sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -819,332 +991,413 @@
var list11 = new List<AngleSharp.Dom.IElement>();
while (true)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
logg.WriteLog(page.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
- using (HttpClient client = new HttpClient())
+ try
{
- //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
- HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
- var res = response.Content.ReadAsStringAsync().Result;
- var document = parser.ParseDocument(res);
- var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
- var contentList = sssdfsdfsd.QuerySelector("ul");
- if (contentList != null)
+ HttpClientHandler handler = new HttpClientHandler();
+ handler.CookieContainer = new CookieContainer();
+ using (HttpClient client = new HttpClient(handler))
{
- var lists = contentList.QuerySelectorAll("li");
- if (lists == null || lists.Length == 0)
+ client.Timeout = TimeSpan.FromSeconds(60);
+ client.DefaultRequestHeaders.Add("Accept", "*/*");
+ client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+ client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
+ //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
+ HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
+ //var res = response.Content.ReadAsStringAsync().Result;
+ string res = "";
+ if (response.IsSuccessStatusCode)
{
- break;
- }
- if (page != 1)
- {
- var list22 = lists.ToList();
- var breakable = true;
- if (list11.Count == list22.Count)
+ using (var responseStream = await response.Content.ReadAsStreamAsync())
{
- foreach (var list11111 in list11)
+ using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
{
- var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
-
- if (listcount <= 0)
+ using (var reader = new StreamReader(decompressedStream))
{
- breakable = false;
- break;
+ res = await reader.ReadToEndAsync();
+ // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
}
}
- if (breakable)
- {
- break;
- }
}
- list11 = list22;
}
else
{
- list11 = lists.ToList();
+ // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
}
-
- foreach (var sichuanjieshoudtl1 in lists)
+ var document = parser.ParseDocument(res);
+ var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
+ var contentList = sssdfsdfsd.QuerySelector("ul");
+ if (contentList != null)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
- try
+ var lists = contentList.QuerySelectorAll("li");
+ if (lists == null || lists.Length == 0)
{
- sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
-
- logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
- aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
- aaaaaaaa.shorttitle = aaaaaaaa.title;
-
-
-
- string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
- string Purchaser = null;
- string Agency = null;
- var NoticeTime = sssssss[0].Trim();
- foreach (var ssss in sssssss)
+ break;
+ }
+ if (page != 1)
+ {
+ var list22 = lists.ToList();
+ var breakable = true;
+ if (list11.Count == list22.Count)
{
- if (ssss.IndexOf("閲囪喘浜�") >= 0)
+ foreach (var list11111 in list11)
{
- Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
+
+ if (listcount <= 0)
+ {
+ breakable = false;
+ break;
+ }
}
- if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ if (breakable)
{
- Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ break;
}
}
- using (HttpClient clientdtl = new HttpClient())
+ list11 = list22;
+ }
+ else
+ {
+ list11 = lists.ToList();
+ }
+
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
{
- var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "03" && x.OpenTenderTime == DateTime.Parse(NoticeTime)));
- if (zhengfuProjectCount <=0)
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
{
- HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
- var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
- var documentdtl = parser.ParseDocument(resdtl);
-
- var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
-
- var content = dtl.OuterHtml;
-
- var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
-
- var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
- if (fujians != null && fujians.Count > 0)
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
{
- var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
- foreach (var fujianya in fujians)
- {
- var ssss = fujianya.Id;
- var sssss = fujianya.InnerHtml;
- fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
- }
- fujianhtml += "</tbody></table></div>";
-
- content += fujianhtml;
+ bbb = aaaa.Split(',');
}
-
-
-
- string OpenTenderCode = null;
- decimal? Budget = null;
- DateTime? OpenTenderTime = null;
-
- var td = dtl.QuerySelectorAll("p");
-
- if (td != null && td.Length > 0)
- {
-
-
- for (int i = 0; i < td.Length; i++)
- {
- if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0)
- {
-
- OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("涓�銆�", "").Trim();
-
- if (OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�")>=0)
- {
- OpenTenderCode.Substring(0, OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�")+1).Replace("锛�", "");
- }
-
- }
- if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
- {
- var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
- decimal a = 0;
- if (decimal.TryParse(yusuan, out a))
- {
- Budget = a * 10000;
- }
-
-
- }
-
- if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
- {
-
-
- DateTime a;
- var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
- if (DateTime.TryParse(sss, out a))
- {
- OpenTenderTime = a;
- }
-
-
- }
- }
- }
-
-
- td = dtl.QuerySelectorAll("h4");
-
- if (td != null && td.Length > 0)
- {
-
-
- for (int i = 0; i < td.Length; i++)
- {
- if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0)
- {
-
- OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("涓�銆�", "").Trim();
-
- if (OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�") >= 0)
- {
- OpenTenderCode.Substring(0, OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�") + 1).Replace("锛�", "");
- }
-
- }
- if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
- {
- var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
- decimal a = 0;
- if (decimal.TryParse(yusuan, out a))
- {
- Budget = a * 10000;
- }
-
-
- }
-
- if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
- {
-
-
- DateTime a;
- var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
- if (DateTime.TryParse(sss, out a))
- {
- OpenTenderTime = a;
- }
-
-
- }
- }
- }
-
-
-
-
-
-
- var zhengfuProject = new ZhengfuProject();
- zhengfuProject.Id = Guid.NewGuid().ToString();
- zhengfuProject.Gglx = "03";
- string gglxName = "缁撴灉鍏憡";
- zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
- zhengfuProject.OpenTenderTime = OpenTenderTime;
- zhengfuProject.RegionCode = null;
- //regionName = regionName;
- zhengfuProject.Sheng = "510000";
- string ShengName = "鍥涘窛鐪�";
- zhengfuProject.City = null;
- //CityName = CityName;
-
-
- zhengfuProject.Cgfs = null;
-
-
- zhengfuProject.OpenTenderCode = OpenTenderCode;
- zhengfuProject.Budget = Budget;
-
-
- zhengfuProject.Title = aaaaaaaa.title;
- zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
- zhengfuProject.Pageurl = aaaaaaaa.pageurl;
- zhengfuProject.Purchaser = Purchaser;
-
-
-
-
- zhengfuProject.Agency = Agency;
- zhengfuProject.AgencyCode = null;
- zhengfuProject.Content = content;
- zhengfuProject.RecStatus = "A";
- zhengfuProject.Creater = "1";
- zhengfuProject.Createtime = DateTime.Now;
- zhengfuProject.Modifier = "1";
- zhengfuProject.Modifytime = zhengfuProject.Createtime;
- _ccontext.ZhengfuProjects.Add(zhengfuProject);
-
-
- string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
- string result = string.Empty;
- Uri postUrl = new Uri(url);
- eswebcrawler eswebcrawler1 = new eswebcrawler();
- eswebcrawler1.Id = zhengfuProject.Id;
- eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
- eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
- eswebcrawler1.sheng = zhengfuProject.Sheng;
- eswebcrawler1.shengName = ShengName;
- eswebcrawler1.city = zhengfuProject.City;
- eswebcrawler1.cityName = null;
- eswebcrawler1.regionCode = zhengfuProject.RegionCode;
- eswebcrawler1.regionName = null;
- eswebcrawler1.cgfs = zhengfuProject.Cgfs;
- eswebcrawler1.cgfsName = null;
- eswebcrawler1.gglx = zhengfuProject.Gglx;
- eswebcrawler1.gglxName = gglxName;
- eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
- eswebcrawler1.title = zhengfuProject.Title;
- eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
- eswebcrawler1.pageurl = zhengfuProject.Pageurl;
- eswebcrawler1.pingmu = zhengfuProject.Pingmu;
- eswebcrawler1.pingmuName = "";
- eswebcrawler1.purchaser = zhengfuProject.Purchaser;
- eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
- eswebcrawler1.agency = zhengfuProject.Agency;
- eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
- eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
-
- string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
-
- using (HttpContent httpContent = new StringContent(requestJson))
- {
- httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
- using (HttpClient httpClient = new HttpClient())
- {
- httpClient.Timeout = TimeSpan.FromSeconds(60);
- HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
- result = responseMessage.Content.ReadAsStringAsync().Result;
- }
-
- }
-
- Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
-
- if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
- {
-
- }
- else
- {
- throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
- }
-
-
- _ccontext.SaveChanges();
-
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
}
}
}
- catch (Exception ex)
+
+
+
+
+ int ccc = 0;
+
+ foreach (var sichuanjieshoudtl1 in lists)
{
- logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ try
+ {
+ sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
- logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+ logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
+ aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
+ aaaaaaaa.shorttitle = aaaaaaaa.title;
- logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+
+
+ string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
+ string Purchaser = null;
+ string Agency = null;
+ var NoticeTime = sssssss[0].Trim();
+ foreach (var ssss in sssssss)
+ {
+ if (ssss.IndexOf("閲囪喘浜�") >= 0)
+ {
+ Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ {
+ Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ }
+ using (HttpClient clientdtl = new HttpClient())
+ {
+ var notime = DateTime.Parse(NoticeTime).ToString("yyyy-MM-dd");
+ var notimestart = DateTime.Parse(notime);
+ var notimeend = notimestart.AddDays(1);
+ var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "03" && x.NoticeTime >= notimestart && x.NoticeTime < notimeend));
+ if (zhengfuProjectCount <= 0)
+ {
+ clientdtl.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
+ var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
+ var documentdtl = parser.ParseDocument(resdtl);
+
+ var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
+
+ var content = dtl.OuterHtml;
+
+ var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
+
+ var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
+ if (fujians != null && fujians.Count > 0)
+ {
+ var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
+ foreach (var fujianya in fujians)
+ {
+ var ssss = fujianya.Id;
+ var sssss = fujianya.InnerHtml;
+ fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
+ }
+ fujianhtml += "</tbody></table></div>";
+
+ content += fujianhtml;
+ }
+
+
+
+ string OpenTenderCode = null;
+ decimal? Budget = null;
+ DateTime? OpenTenderTime = null;
+
+ var td = dtl.QuerySelectorAll("p");
+
+ if (td != null && td.Length > 0)
+ {
+
+
+ for (int i = 0; i < td.Length; i++)
+ {
+ if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0)
+ {
+ if (string.IsNullOrEmpty(OpenTenderCode))
+ OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("涓�銆�", "").Trim();
+
+ if (OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�") >= 0)
+ {
+ OpenTenderCode.Substring(0, OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�") + 1).Replace("锛�", "");
+ }
+
+ }
+ if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
+ {
+ var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
+ decimal a = 0;
+ if (decimal.TryParse(yusuan, out a))
+ {
+ Budget = a * 10000;
+ }
+
+
+ }
+
+ if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
+ {
+
+
+ DateTime a;
+ var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
+ {
+ OpenTenderTime = a;
+ }
+
+
+ }
+ }
+ }
+
+
+ td = dtl.QuerySelectorAll("h4");
+
+ if (td != null && td.Length > 0)
+ {
+
+
+ for (int i = 0; i < td.Length; i++)
+ {
+ if (td[i].TextContent.IndexOf("椤圭洰缂栧彿锛�") >= 0)
+ {
+ if (string.IsNullOrEmpty(OpenTenderCode))
+ OpenTenderCode = td[i].TextContent.Replace("椤圭洰缂栧彿锛�", "").Replace("涓�銆�", "").Trim();
+
+ if (OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�") >= 0)
+ {
+ OpenTenderCode.Substring(0, OpenTenderCode.IndexOf("锛堟嫑鏍囨枃浠剁紪鍙�") + 1).Replace("锛�", "");
+ }
+
+ }
+ if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
+ {
+ var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
+ decimal a = 0;
+ if (decimal.TryParse(yusuan, out a))
+ {
+ Budget = a * 10000;
+ }
+
+
+ }
+
+ if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
+ {
+
+
+ DateTime a;
+ var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
+ {
+ OpenTenderTime = a;
+ }
+
+
+ }
+ }
+ }
+
+
+
+
+
+
+ var zhengfuProject = new ZhengfuProject();
+ zhengfuProject.Id = Guid.NewGuid().ToString();
+ zhengfuProject.Gglx = "03";
+ string gglxName = "缁撴灉鍏憡";
+ zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
+ zhengfuProject.OpenTenderTime = OpenTenderTime;
+ zhengfuProject.RegionCode = null;
+ //regionName = regionName;
+ zhengfuProject.Sheng = "510000";
+ string ShengName = "鍥涘窛鐪�";
+ zhengfuProject.City = null;
+ //CityName = CityName;
+
+
+ zhengfuProject.Cgfs = null;
+
+
+ zhengfuProject.OpenTenderCode = OpenTenderCode;
+ zhengfuProject.Budget = Budget;
+
+
+ zhengfuProject.Title = aaaaaaaa.title;
+ zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
+ zhengfuProject.Pageurl = aaaaaaaa.pageurl;
+ zhengfuProject.Purchaser = Purchaser;
+
+
+
+
+ zhengfuProject.Agency = Agency;
+ zhengfuProject.AgencyCode = null;
+ zhengfuProject.Content = content;
+ zhengfuProject.RecStatus = "A";
+ zhengfuProject.Creater = "1";
+ zhengfuProject.Createtime = DateTime.Now;
+ zhengfuProject.Modifier = "1";
+ zhengfuProject.Modifytime = zhengfuProject.Createtime;
+ _ccontext.ZhengfuProjects.Add(zhengfuProject);
+
+
+ string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
+ string result = string.Empty;
+ Uri postUrl = new Uri(url);
+ eswebcrawler eswebcrawler1 = new eswebcrawler();
+ eswebcrawler1.Id = zhengfuProject.Id;
+ eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
+ eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
+ eswebcrawler1.sheng = zhengfuProject.Sheng;
+ eswebcrawler1.shengName = ShengName;
+ eswebcrawler1.city = zhengfuProject.City;
+ eswebcrawler1.cityName = null;
+ eswebcrawler1.regionCode = zhengfuProject.RegionCode;
+ eswebcrawler1.regionName = null;
+ eswebcrawler1.cgfs = zhengfuProject.Cgfs;
+ eswebcrawler1.cgfsName = null;
+ eswebcrawler1.gglx = zhengfuProject.Gglx;
+ eswebcrawler1.gglxName = gglxName;
+ eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
+ eswebcrawler1.title = zhengfuProject.Title;
+ eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
+ eswebcrawler1.pageurl = zhengfuProject.Pageurl;
+ eswebcrawler1.pingmu = zhengfuProject.Pingmu;
+ eswebcrawler1.pingmuName = "";
+ eswebcrawler1.purchaser = zhengfuProject.Purchaser;
+ eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
+ eswebcrawler1.agency = zhengfuProject.Agency;
+ eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
+ eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
+
+ string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
+
+ using (HttpContent httpContent = new StringContent(requestJson))
+ {
+ httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
+ using (HttpClient httpClient = new HttpClient())
+ {
+ httpClient.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
+ result = responseMessage.Content.ReadAsStringAsync().Result;
+ }
+
+ }
+
+ Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
+
+ if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
+ {
+
+ }
+ else
+ {
+ throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
+ }
+
+
+ _ccontext.SaveChanges();
+
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ }
+
+
}
-
-
}
}
+ page += 1;
}
- page += 1;
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 60 * 5);//闃绘璁惧畾鏃堕棿
+ }
+
+
+
}
@@ -1157,7 +1410,7 @@
}
//鏇存鍏憡
- public static void gengzhenggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
+ public static async void gengzhenggonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
{
sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -1166,278 +1419,360 @@
var list11 = new List<AngleSharp.Dom.IElement>();
while (true)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
logg.WriteLog(page.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
- using (HttpClient client = new HttpClient())
+ try
{
- //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
- HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
- var res = response.Content.ReadAsStringAsync().Result;
- var document = parser.ParseDocument(res);
- var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
- var contentList = sssdfsdfsd.QuerySelector("ul");
- if (contentList != null)
+ HttpClientHandler handler = new HttpClientHandler();
+ handler.CookieContainer = new CookieContainer();
+ using (HttpClient client = new HttpClient(handler))
{
- var lists = contentList.QuerySelectorAll("li");
- if (lists == null || lists.Length == 0)
+ client.Timeout = TimeSpan.FromSeconds(60);
+ client.DefaultRequestHeaders.Add("Accept", "*/*");
+ client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+ client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
+ //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
+ HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
+ //var res = response.Content.ReadAsStringAsync().Result;
+ string res = "";
+ if (response.IsSuccessStatusCode)
{
- break;
- }
- if (page != 1)
- {
- var list22 = lists.ToList();
- var breakable = true;
- if (list11.Count == list22.Count)
+ using (var responseStream = await response.Content.ReadAsStreamAsync())
{
- foreach (var list11111 in list11)
+ using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
{
- var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
-
- if (listcount <= 0)
+ using (var reader = new StreamReader(decompressedStream))
{
- breakable = false;
- break;
+ res = await reader.ReadToEndAsync();
+ // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
}
}
- if (breakable)
- {
- break;
- }
}
- list11 = list22;
}
else
{
- list11 = lists.ToList();
+ // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
}
-
- foreach (var sichuanjieshoudtl1 in lists)
+ var document = parser.ParseDocument(res);
+ var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
+ var contentList = sssdfsdfsd.QuerySelector("ul");
+ if (contentList != null)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
- try
+ var lists = contentList.QuerySelectorAll("li");
+ if (lists == null || lists.Length == 0)
{
- sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
-
- logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
- aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
- aaaaaaaa.shorttitle = aaaaaaaa.title;
-
-
-
- string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
- string Purchaser = null;
- string Agency = null;
- var NoticeTime = sssssss[0].Trim();
- foreach (var ssss in sssssss)
+ break;
+ }
+ if (page != 1)
+ {
+ var list22 = lists.ToList();
+ var breakable = true;
+ if (list11.Count == list22.Count)
{
- if (ssss.IndexOf("閲囪喘浜�") >= 0)
+ foreach (var list11111 in list11)
{
- Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
+
+ if (listcount <= 0)
+ {
+ breakable = false;
+ break;
+ }
}
- if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ if (breakable)
{
- Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ break;
}
}
- using (HttpClient clientdtl = new HttpClient())
+ list11 = list22;
+ }
+ else
+ {
+ list11 = lists.ToList();
+ }
+
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
{
- var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "04" && x.OpenTenderTime == DateTime.Parse(NoticeTime)));
- if (zhengfuProjectCount<=0)
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
{
- HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
- var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
- var documentdtl = parser.ParseDocument(resdtl);
-
- var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
-
- var content = dtl.OuterHtml;
-
- var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
-
- var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
- if (fujians != null && fujians.Count > 0)
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
{
- var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
- foreach (var fujianya in fujians)
- {
- var ssss = fujianya.Id;
- var sssss = fujianya.InnerHtml;
- fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
- }
- fujianhtml += "</tbody></table></div>";
-
- content += fujianhtml;
+ bbb = aaaa.Split(',');
}
-
-
-
- string OpenTenderCode = null;
- decimal? Budget = null;
- DateTime? OpenTenderTime = null;
-
- var td = dtl.QuerySelectorAll("p");
-
- if (td != null && td.Length > 0)
- {
-
-
- for (int i = 0; i < td.Length; i++)
- {
- if (td[i].TextContent.IndexOf("鍘熷叕鍛婄殑閲囪喘椤圭洰缂栧彿锛�") >= 0)
- {
-
- OpenTenderCode = td[i].TextContent.Replace("鍘熷叕鍛婄殑閲囪喘椤圭洰缂栧彿锛�", "").Trim();
-
- }
- if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
- {
- var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
- decimal a = 0;
- if (decimal.TryParse(yusuan, out a))
- {
- Budget = a * 10000;
- }
-
-
- }
-
- if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
- {
-
-
- DateTime a;
- var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
- if (DateTime.TryParse(sss, out a))
- {
- OpenTenderTime = a;
- }
-
-
- }
- }
- }
-
-
-
-
-
- var zhengfuProject = new ZhengfuProject();
- zhengfuProject.Id = Guid.NewGuid().ToString();
- zhengfuProject.Gglx = "04";
- string gglxName = "鏇存鍏憡";
- zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
- zhengfuProject.OpenTenderTime = OpenTenderTime;
- zhengfuProject.RegionCode = null;
- //regionName = regionName;
- zhengfuProject.Sheng = "510000";
- string ShengName = "鍥涘窛鐪�";
- zhengfuProject.City = null;
- //CityName = CityName;
-
-
- zhengfuProject.Cgfs = null;
-
-
- zhengfuProject.OpenTenderCode = OpenTenderCode;
- zhengfuProject.Budget = Budget;
-
-
- zhengfuProject.Title = aaaaaaaa.title;
- zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
- zhengfuProject.Pageurl = aaaaaaaa.pageurl;
- zhengfuProject.Purchaser = Purchaser;
-
-
-
-
- zhengfuProject.Agency = Agency;
- zhengfuProject.AgencyCode = null;
- zhengfuProject.Content = content;
- zhengfuProject.RecStatus = "A";
- zhengfuProject.Creater = "1";
- zhengfuProject.Createtime = DateTime.Now;
- zhengfuProject.Modifier = "1";
- zhengfuProject.Modifytime = zhengfuProject.Createtime;
- _ccontext.ZhengfuProjects.Add(zhengfuProject);
-
-
- string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
- string result = string.Empty;
- Uri postUrl = new Uri(url);
- eswebcrawler eswebcrawler1 = new eswebcrawler();
- eswebcrawler1.Id = zhengfuProject.Id;
- eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
- eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
- eswebcrawler1.sheng = zhengfuProject.Sheng;
- eswebcrawler1.shengName = ShengName;
- eswebcrawler1.city = zhengfuProject.City;
- eswebcrawler1.cityName = null;
- eswebcrawler1.regionCode = zhengfuProject.RegionCode;
- eswebcrawler1.regionName = null;
- eswebcrawler1.cgfs = zhengfuProject.Cgfs;
- eswebcrawler1.cgfsName = null;
- eswebcrawler1.gglx = zhengfuProject.Gglx;
- eswebcrawler1.gglxName = gglxName;
- eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
- eswebcrawler1.title = zhengfuProject.Title;
- eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
- eswebcrawler1.pageurl = zhengfuProject.Pageurl;
- eswebcrawler1.pingmu = zhengfuProject.Pingmu;
- eswebcrawler1.pingmuName = "";
- eswebcrawler1.purchaser = zhengfuProject.Purchaser;
- eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
- eswebcrawler1.agency = zhengfuProject.Agency;
- eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
- eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
-
- string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
-
- using (HttpContent httpContent = new StringContent(requestJson))
- {
- httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
- using (HttpClient httpClient = new HttpClient())
- {
- httpClient.Timeout = TimeSpan.FromSeconds(60);
- HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
- result = responseMessage.Content.ReadAsStringAsync().Result;
- }
-
- }
-
- Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
-
- if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
- {
-
- }
- else
- {
- throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
- }
-
-
- _ccontext.SaveChanges();
-
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
}
}
}
- catch (Exception ex)
+
+
+
+
+ int ccc = 0;
+
+ foreach (var sichuanjieshoudtl1 in lists)
{
- logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ try
+ {
+ sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
- logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+ logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
+ aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
+ aaaaaaaa.shorttitle = aaaaaaaa.title;
- logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+
+
+ string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
+ string Purchaser = null;
+ string Agency = null;
+ var NoticeTime = sssssss[0].Trim();
+ foreach (var ssss in sssssss)
+ {
+ if (ssss.IndexOf("閲囪喘浜�") >= 0)
+ {
+ Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ {
+ Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ }
+ using (HttpClient clientdtl = new HttpClient())
+ {
+ var notime = DateTime.Parse(NoticeTime).ToString("yyyy-MM-dd");
+ var notimestart = DateTime.Parse(notime);
+ var notimeend = notimestart.AddDays(1);
+ var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "04" && x.NoticeTime >= notimestart && x.NoticeTime < notimeend));
+ if (zhengfuProjectCount <= 0)
+ {
+ clientdtl.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
+ var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
+ var documentdtl = parser.ParseDocument(resdtl);
+
+ var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
+
+ var content = dtl.OuterHtml;
+
+ var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
+
+ var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
+ if (fujians != null && fujians.Count > 0)
+ {
+ var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
+ foreach (var fujianya in fujians)
+ {
+ var ssss = fujianya.Id;
+ var sssss = fujianya.InnerHtml;
+ fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
+ }
+ fujianhtml += "</tbody></table></div>";
+
+ content += fujianhtml;
+ }
+
+
+
+ string OpenTenderCode = null;
+ decimal? Budget = null;
+ DateTime? OpenTenderTime = null;
+
+ var td = dtl.QuerySelectorAll("p");
+
+ if (td != null && td.Length > 0)
+ {
+
+
+ for (int i = 0; i < td.Length; i++)
+ {
+ if (td[i].TextContent.IndexOf("鍘熷叕鍛婄殑閲囪喘椤圭洰缂栧彿锛�") >= 0)
+ {
+ if (string.IsNullOrEmpty(OpenTenderCode))
+ OpenTenderCode = td[i].TextContent.Replace("鍘熷叕鍛婄殑閲囪喘椤圭洰缂栧彿锛�", "").Trim();
+
+ }
+ if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
+ {
+ var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
+ decimal a = 0;
+ if (decimal.TryParse(yusuan, out a))
+ {
+ Budget = a * 10000;
+ }
+
+
+ }
+
+ if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
+ {
+
+
+ DateTime a;
+ var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
+ {
+ OpenTenderTime = a;
+ }
+
+
+ }
+ }
+ }
+
+
+
+
+
+ var zhengfuProject = new ZhengfuProject();
+ zhengfuProject.Id = Guid.NewGuid().ToString();
+ zhengfuProject.Gglx = "04";
+ string gglxName = "鏇存鍏憡";
+ zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
+ zhengfuProject.OpenTenderTime = OpenTenderTime;
+ zhengfuProject.RegionCode = null;
+ //regionName = regionName;
+ zhengfuProject.Sheng = "510000";
+ string ShengName = "鍥涘窛鐪�";
+ zhengfuProject.City = null;
+ //CityName = CityName;
+
+
+ zhengfuProject.Cgfs = null;
+
+
+ zhengfuProject.OpenTenderCode = OpenTenderCode;
+ zhengfuProject.Budget = Budget;
+
+
+ zhengfuProject.Title = aaaaaaaa.title;
+ zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
+ zhengfuProject.Pageurl = aaaaaaaa.pageurl;
+ zhengfuProject.Purchaser = Purchaser;
+
+
+
+
+ zhengfuProject.Agency = Agency;
+ zhengfuProject.AgencyCode = null;
+ zhengfuProject.Content = content;
+ zhengfuProject.RecStatus = "A";
+ zhengfuProject.Creater = "1";
+ zhengfuProject.Createtime = DateTime.Now;
+ zhengfuProject.Modifier = "1";
+ zhengfuProject.Modifytime = zhengfuProject.Createtime;
+ _ccontext.ZhengfuProjects.Add(zhengfuProject);
+
+
+ string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
+ string result = string.Empty;
+ Uri postUrl = new Uri(url);
+ eswebcrawler eswebcrawler1 = new eswebcrawler();
+ eswebcrawler1.Id = zhengfuProject.Id;
+ eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
+ eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
+ eswebcrawler1.sheng = zhengfuProject.Sheng;
+ eswebcrawler1.shengName = ShengName;
+ eswebcrawler1.city = zhengfuProject.City;
+ eswebcrawler1.cityName = null;
+ eswebcrawler1.regionCode = zhengfuProject.RegionCode;
+ eswebcrawler1.regionName = null;
+ eswebcrawler1.cgfs = zhengfuProject.Cgfs;
+ eswebcrawler1.cgfsName = null;
+ eswebcrawler1.gglx = zhengfuProject.Gglx;
+ eswebcrawler1.gglxName = gglxName;
+ eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
+ eswebcrawler1.title = zhengfuProject.Title;
+ eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
+ eswebcrawler1.pageurl = zhengfuProject.Pageurl;
+ eswebcrawler1.pingmu = zhengfuProject.Pingmu;
+ eswebcrawler1.pingmuName = "";
+ eswebcrawler1.purchaser = zhengfuProject.Purchaser;
+ eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
+ eswebcrawler1.agency = zhengfuProject.Agency;
+ eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
+ eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
+
+ string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
+
+ using (HttpContent httpContent = new StringContent(requestJson))
+ {
+ httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
+ using (HttpClient httpClient = new HttpClient())
+ {
+ httpClient.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
+ result = responseMessage.Content.ReadAsStringAsync().Result;
+ }
+
+ }
+
+ Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
+
+ if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
+ {
+
+ }
+ else
+ {
+ throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
+ }
+
+
+ _ccontext.SaveChanges();
+
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ }
+
+
}
-
-
}
}
+
+ page += 1;
}
- page += 1;
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 60 * 5);//闃绘璁惧畾鏃堕棿
+ }
+
+
+
}
@@ -1451,7 +1786,7 @@
//搴熸爣鍏憡
- public static void feibiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
+ public static async void feibiaogonggao(WebCrawlerContext _ccontext, string sichuanpageurll, string startTime, string endTime , int page)
{
sichuanpageurll = sichuanpageurll.Replace("start_time=", "start_time=" + startTime.Replace("-", "%3A"));
@@ -1460,279 +1795,361 @@
var list11 = new List<AngleSharp.Dom.IElement>();
while (true)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
logg.WriteLog(page.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
string sichuanpageurl2 = sichuanpageurll.Replace("page_index=1", "page_index=" + page);
- using (HttpClient client = new HttpClient())
+ try
{
- //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
- HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
- var res = response.Content.ReadAsStringAsync().Result;
- var document = parser.ParseDocument(res);
- var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
- var contentList = sssdfsdfsd.QuerySelector("ul");
- if (contentList != null)
+ HttpClientHandler handler = new HttpClientHandler();
+ handler.CookieContainer = new CookieContainer();
+ using (HttpClient client = new HttpClient(handler))
{
- var lists = contentList.QuerySelectorAll("li");
- if (lists == null || lists.Length == 0)
+ //List<sichuanjieshoudtl> data = new List<sichuanjieshoudtl>();
+ client.Timeout = TimeSpan.FromSeconds(60);
+ client.DefaultRequestHeaders.Add("Accept", "*/*");
+ client.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
+ client.DefaultRequestHeaders.Add("Host", "search.ccgp.gov.cn");
+ HttpResponseMessage response = client.GetAsync(sichuanpageurl2).Result;
+ //var res = response.Content.ReadAsStringAsync().Result;
+ string res = "";
+ if (response.IsSuccessStatusCode)
{
- break;
- }
- if (page != 1)
- {
- var list22 = lists.ToList();
- var breakable = true;
- if (list11.Count == list22.Count)
+ using (var responseStream = await response.Content.ReadAsStreamAsync())
{
- foreach (var list11111 in list11)
+ using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
{
- var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
-
- if (listcount <= 0)
+ using (var reader = new StreamReader(decompressedStream))
{
- breakable = false;
- break;
+ res = await reader.ReadToEndAsync();
+ // 澶勭悊瑙e帇缂╁悗鐨勫搷搴斿唴瀹�
}
}
- if (breakable)
- {
- break;
- }
}
- list11 = list22;
}
else
{
- list11 = lists.ToList();
+ // 澶勭悊璇锋眰澶辫触鐨勬儏鍐�
}
-
- foreach (var sichuanjieshoudtl1 in lists)
+ var document = parser.ParseDocument(res);
+ var sssdfsdfsd = document.All.Where(m => m.ClassName == "vT-srch-result-list").FirstOrDefault();
+ var contentList = sssdfsdfsd.QuerySelector("ul");
+ if (contentList != null)
{
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
- try
+ var lists = contentList.QuerySelectorAll("li");
+ if (lists == null || lists.Length == 0)
{
- sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
- aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
-
- logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
- aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
- aaaaaaaa.shorttitle = aaaaaaaa.title;
-
-
-
- string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
- string Purchaser = null;
- string Agency = null;
- var NoticeTime = sssssss[0].Trim();
- foreach (var ssss in sssssss)
- {
- if (ssss.IndexOf("閲囪喘浜�") >= 0)
- {
- Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
- }
- if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
- {
- Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
- }
- }
- using (HttpClient clientdtl = new HttpClient())
- {
- var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "11" && x.OpenTenderTime == DateTime.Parse(NoticeTime)));
- if (zhengfuProjectCount <=0)
- {
- HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
- var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
- var documentdtl = parser.ParseDocument(resdtl);
-
- var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
-
- var content = dtl.OuterHtml;
-
- var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
-
- var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
- if (fujians != null && fujians.Count > 0)
- {
- var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
- foreach (var fujianya in fujians)
- {
- var ssss = fujianya.Id;
- var sssss = fujianya.InnerHtml;
- fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"http://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
- }
- fujianhtml += "</tbody></table></div>";
-
- content += fujianhtml;
- }
-
-
-
- string OpenTenderCode = null;
- decimal? Budget = null;
- DateTime? OpenTenderTime = null;
-
- var td = dtl.QuerySelectorAll("p");
-
- if (td != null && td.Length > 0)
- {
-
-
- for (int i = 0; i < td.Length; i++)
- {
- if (td[i].TextContent.IndexOf("閲囪喘椤圭洰缂栧彿锛�") >= 0)
- {
-
- OpenTenderCode = td[i].TextContent.Replace("閲囪喘椤圭洰缂栧彿锛�", "").Trim();
-
- }
- if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
- {
- var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
- decimal a = 0;
- if (decimal.TryParse(yusuan, out a))
- {
- Budget = a * 10000;
- }
-
-
- }
-
- if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
- {
-
-
- DateTime a;
- var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
- if (DateTime.TryParse(sss, out a))
- {
- OpenTenderTime = a;
- }
-
-
- }
- }
- }
-
-
-
-
-
-
- var zhengfuProject = new ZhengfuProject();
- zhengfuProject.Id = Guid.NewGuid().ToString();
- zhengfuProject.Gglx = "11";
- string gglxName = "搴熸爣鍏憡";
- zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
- zhengfuProject.OpenTenderTime = OpenTenderTime;
- zhengfuProject.RegionCode = null;
- //regionName = regionName;
- zhengfuProject.Sheng = "510000";
- string ShengName = "鍥涘窛鐪�";
- zhengfuProject.City = null;
- //CityName = CityName;
-
-
- zhengfuProject.Cgfs = null;
-
-
- zhengfuProject.OpenTenderCode = OpenTenderCode;
- zhengfuProject.Budget = Budget;
-
-
- zhengfuProject.Title = aaaaaaaa.title;
- zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
- zhengfuProject.Pageurl = aaaaaaaa.pageurl;
- zhengfuProject.Purchaser = Purchaser;
-
-
-
-
- zhengfuProject.Agency = Agency;
- zhengfuProject.AgencyCode = null;
- zhengfuProject.Content = content;
- zhengfuProject.RecStatus = "A";
- zhengfuProject.Creater = "1";
- zhengfuProject.Createtime = DateTime.Now;
- zhengfuProject.Modifier = "1";
- zhengfuProject.Modifytime = zhengfuProject.Createtime;
- _ccontext.ZhengfuProjects.Add(zhengfuProject);
-
-
- string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
- string result = string.Empty;
- Uri postUrl = new Uri(url);
- eswebcrawler eswebcrawler1 = new eswebcrawler();
- eswebcrawler1.Id = zhengfuProject.Id;
- eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
- eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
- eswebcrawler1.sheng = zhengfuProject.Sheng;
- eswebcrawler1.shengName = ShengName;
- eswebcrawler1.city = zhengfuProject.City;
- eswebcrawler1.cityName = null;
- eswebcrawler1.regionCode = zhengfuProject.RegionCode;
- eswebcrawler1.regionName = null;
- eswebcrawler1.cgfs = zhengfuProject.Cgfs;
- eswebcrawler1.cgfsName = null;
- eswebcrawler1.gglx = zhengfuProject.Gglx;
- eswebcrawler1.gglxName = gglxName;
- eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
- eswebcrawler1.title = zhengfuProject.Title;
- eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
- eswebcrawler1.pageurl = zhengfuProject.Pageurl;
- eswebcrawler1.pingmu = zhengfuProject.Pingmu;
- eswebcrawler1.pingmuName = "";
- eswebcrawler1.purchaser = zhengfuProject.Purchaser;
- eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
- eswebcrawler1.agency = zhengfuProject.Agency;
- eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
- eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
-
- string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
-
- using (HttpContent httpContent = new StringContent(requestJson))
- {
- httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
- using (HttpClient httpClient = new HttpClient())
- {
- httpClient.Timeout = TimeSpan.FromSeconds(60);
- HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
- result = responseMessage.Content.ReadAsStringAsync().Result;
- }
-
- }
-
- Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
-
- if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
- {
-
- }
- else
- {
- throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
- }
-
-
- _ccontext.SaveChanges();
-
- Thread.CurrentThread.Join(1000 * 5);//闃绘璁惧畾鏃堕棿
- }
- }
+ break;
}
- catch (Exception ex)
+ if (page != 1)
{
- logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+ var list22 = lists.ToList();
+ var breakable = true;
+ if (list11.Count == list22.Count)
+ {
+ foreach (var list11111 in list11)
+ {
+ var listcount = list22.Count(x => x.InnerHtml == list11111.InnerHtml);
- logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
-
- logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ if (listcount <= 0)
+ {
+ breakable = false;
+ break;
+ }
+ }
+ if (breakable)
+ {
+ break;
+ }
+ }
+ list11 = list22;
+ }
+ else
+ {
+ list11 = lists.ToList();
}
+ string[] bbb = null;
+ var scriptElements = document.QuerySelectorAll("script");
+ foreach (var scriptElement in scriptElements)
+ {
+ var scriptText = scriptElement.TextContent;
+
+ if (scriptText.Contains("var ohtmlurls")) // 妫�鏌ユ枃鏈腑鏄惁鍖呭惈鐗瑰畾鍙橀噺
+ {
+
+ int startIndex = scriptText.IndexOf('"') + 1;
+ int endIndex = scriptText.IndexOf('"', startIndex);
+ if (startIndex > 0 && endIndex > startIndex)
+ {
+ var aaaa = scriptText.Substring(startIndex, endIndex - startIndex);
+ if (!string.IsNullOrEmpty(aaaa))
+ {
+ bbb = aaaa.Split(',');
+ }
+ }
+ }
+ }
+
+
+
+
+ int ccc = 0;
+
+ foreach (var sichuanjieshoudtl1 in lists)
+ {
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ try
+ {
+ sichuanjieshoudtl aaaaaaaa = new sichuanjieshoudtl();
+ if (bbb != null && bbb.Length >= ccc && !string.IsNullOrEmpty(bbb[ccc]))
+ {
+ aaaaaaaa.pageurl = bbb[ccc];
+ ccc = ccc + 1;
+ }
+ else
+ {
+ ccc = ccc + 1;
+ continue;
+ }
+ // aaaaaaaa.pageurl = sichuanjieshoudtl1.QuerySelector("a").GetAttribute("href");
+
+ logg.WriteLog(aaaaaaaa.pageurl, "涓浗鏀块噰缃戝洓宸濈渷");
+ aaaaaaaa.title = sichuanjieshoudtl1.QuerySelector("a").TextContent.Replace("\n", "").Trim();
+ aaaaaaaa.shorttitle = aaaaaaaa.title;
+
+
+
+ string[] sssssss = sichuanjieshoudtl1.QuerySelector("span").TextContent.Replace("|", "").Split("\n");
+ string Purchaser = null;
+ string Agency = null;
+ var NoticeTime = sssssss[0].Trim();
+ foreach (var ssss in sssssss)
+ {
+ if (ssss.IndexOf("閲囪喘浜�") >= 0)
+ {
+ Purchaser = ssss.Replace("閲囪喘浜�", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ if (ssss.IndexOf("浠g悊鏈烘瀯") >= 0)
+ {
+ Agency = ssss.Replace("浠g悊鏈烘瀯", "").Replace("锛�", "").Replace("/", "").Trim();
+ }
+ }
+ using (HttpClient clientdtl = new HttpClient())
+ {
+ var notime = DateTime.Parse(NoticeTime).ToString("yyyy-MM-dd");
+ var notimestart = DateTime.Parse(notime);
+ var notimeend = notimestart.AddDays(1);
+ var zhengfuProjectCount = _ccontext.ZhengfuProjects.Count(x => (x.RecStatus == "A" && x.Pageurl == aaaaaaaa.pageurl) || (x.RecStatus == "A" && x.Title == aaaaaaaa.title && x.Gglx == "11" && x.NoticeTime >= notimestart && x.NoticeTime < notimeend));
+ if (zhengfuProjectCount <= 0)
+ {
+ clientdtl.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responsedtl = clientdtl.GetAsync(aaaaaaaa.pageurl).Result;
+ var resdtl = responsedtl.Content.ReadAsStringAsync().Result;
+ var documentdtl = parser.ParseDocument(resdtl);
+
+ var dtl = documentdtl.All.Where(m => m.ClassName == "vF_detail_content_container").FirstOrDefault();
+
+ var content = dtl.OuterHtml;
+
+ var fujian = documentdtl.All.Where(m => m.ClassName == "main").FirstOrDefault();
+
+ var fujians = documentdtl.All.Where(m => m.ClassName == "bizDownload").ToList();
+ if (fujians != null && fujians.Count > 0)
+ {
+ var fujianhtml = "<div class=\"bid_attachtab\"><table width=\"700\" border=\"0\" cellspacing=\"1\" bgcolor=\"#efefef\" style=\"text-align: left; width: 100%; margin: 0px auto;\"><tbody>";
+ foreach (var fujianya in fujians)
+ {
+ var ssss = fujianya.Id;
+ var sssss = fujianya.InnerHtml;
+ fujianhtml += "<tr><td class=\"bid_attachtab_content\">闄勪欢涓嬭浇锛�<a class=\"bizDownload\" target=\"_blank\" href =\"https://download.ccgp.gov.cn/oss/download?uuid=" + ssss + "\" id=\"0E1723104D34335C527765FF6CD28A\" title=\"鐐瑰嚮涓嬭浇\">" + sssss + "</a><br></td></tr>";
+ }
+ fujianhtml += "</tbody></table></div>";
+
+ content += fujianhtml;
+ }
+
+
+
+ string OpenTenderCode = null;
+ decimal? Budget = null;
+ DateTime? OpenTenderTime = null;
+
+ var td = dtl.QuerySelectorAll("p");
+
+ if (td != null && td.Length > 0)
+ {
+
+
+ for (int i = 0; i < td.Length; i++)
+ {
+ if (td[i].TextContent.IndexOf("閲囪喘椤圭洰缂栧彿锛�") >= 0)
+ {
+ if (string.IsNullOrEmpty(OpenTenderCode))
+ OpenTenderCode = td[i].TextContent.Replace("閲囪喘椤圭洰缂栧彿锛�", "").Trim();
+
+ }
+ if (td[i].TextContent.IndexOf("棰勭畻閲戦锛�") >= 0)
+ {
+ var yusuan = td[i].TextContent.Replace("棰勭畻閲戦锛�", "").Replace("锛�", "").Replace("锛�", "").Replace("涓囧厓", "").Replace("浜烘皯甯�", "").Trim();
+ decimal a = 0;
+ if (decimal.TryParse(yusuan, out a))
+ {
+ Budget = a * 10000;
+ }
+
+
+ }
+
+ if (td[i].TextContent.IndexOf("寮�鏍囨椂闂达細") >= 0)
+ {
+
+
+ DateTime a;
+ var sss = td[i].TextContent.Replace("寮�鏍囨椂闂达細", "").Replace("锛堝寳浜椂闂达級", "").Replace("骞�", "-").Replace("鏈�", "-").Replace("鏃�", " ").Replace("鐐�", ":").Replace("鍒�", "").Trim();
+ if (DateTime.TryParse(sss, out a))
+ {
+ OpenTenderTime = a;
+ }
+
+
+ }
+ }
+ }
+
+
+
+
+
+
+ var zhengfuProject = new ZhengfuProject();
+ zhengfuProject.Id = Guid.NewGuid().ToString();
+ zhengfuProject.Gglx = "11";
+ string gglxName = "搴熸爣鍏憡";
+ zhengfuProject.NoticeTime = DateTime.Parse(NoticeTime);
+ zhengfuProject.OpenTenderTime = OpenTenderTime;
+ zhengfuProject.RegionCode = null;
+ //regionName = regionName;
+ zhengfuProject.Sheng = "510000";
+ string ShengName = "鍥涘窛鐪�";
+ zhengfuProject.City = null;
+ //CityName = CityName;
+
+
+ zhengfuProject.Cgfs = null;
+
+
+ zhengfuProject.OpenTenderCode = OpenTenderCode;
+ zhengfuProject.Budget = Budget;
+
+
+ zhengfuProject.Title = aaaaaaaa.title;
+ zhengfuProject.Shorttitle = aaaaaaaa.shorttitle;
+ zhengfuProject.Pageurl = aaaaaaaa.pageurl;
+ zhengfuProject.Purchaser = Purchaser;
+
+
+
+
+ zhengfuProject.Agency = Agency;
+ zhengfuProject.AgencyCode = null;
+ zhengfuProject.Content = content;
+ zhengfuProject.RecStatus = "A";
+ zhengfuProject.Creater = "1";
+ zhengfuProject.Createtime = DateTime.Now;
+ zhengfuProject.Modifier = "1";
+ zhengfuProject.Modifytime = zhengfuProject.Createtime;
+ _ccontext.ZhengfuProjects.Add(zhengfuProject);
+
+
+ string url = $"{Program.api_domain}/webcrawler/_doc/" + zhengfuProject.Id;
+ string result = string.Empty;
+ Uri postUrl = new Uri(url);
+ eswebcrawler eswebcrawler1 = new eswebcrawler();
+ eswebcrawler1.Id = zhengfuProject.Id;
+ eswebcrawler1.noticeTime = zhengfuProject.NoticeTime.ToString("yyyy-MM-dd HH:mm:ss");
+ eswebcrawler1.openTenderTime = zhengfuProject.OpenTenderTime.HasValue ? zhengfuProject.OpenTenderTime.Value.ToString("yyyy-MM-dd HH:mm:ss") : null;
+ eswebcrawler1.sheng = zhengfuProject.Sheng;
+ eswebcrawler1.shengName = ShengName;
+ eswebcrawler1.city = zhengfuProject.City;
+ eswebcrawler1.cityName = null;
+ eswebcrawler1.regionCode = zhengfuProject.RegionCode;
+ eswebcrawler1.regionName = null;
+ eswebcrawler1.cgfs = zhengfuProject.Cgfs;
+ eswebcrawler1.cgfsName = null;
+ eswebcrawler1.gglx = zhengfuProject.Gglx;
+ eswebcrawler1.gglxName = gglxName;
+ eswebcrawler1.openTenderCode = zhengfuProject.OpenTenderCode;
+ eswebcrawler1.title = zhengfuProject.Title;
+ eswebcrawler1.shorttitle = zhengfuProject.Shorttitle;
+ eswebcrawler1.pageurl = zhengfuProject.Pageurl;
+ eswebcrawler1.pingmu = zhengfuProject.Pingmu;
+ eswebcrawler1.pingmuName = "";
+ eswebcrawler1.purchaser = zhengfuProject.Purchaser;
+ eswebcrawler1.budget = zhengfuProject.Budget.HasValue ? zhengfuProject.Budget.Value.ToString() : "";
+ eswebcrawler1.agency = zhengfuProject.Agency;
+ eswebcrawler1.agencyCode = zhengfuProject.AgencyCode;
+ eswebcrawler1.modifyTime = zhengfuProject.Modifytime.ToString("yyyy-MM-dd HH:mm:ss");
+
+ string requestJson = JsonConvert.SerializeObject(eswebcrawler1);
+
+ using (HttpContent httpContent = new StringContent(requestJson))
+ {
+ httpContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");
+ using (HttpClient httpClient = new HttpClient())
+ {
+ httpClient.Timeout = TimeSpan.FromSeconds(60);
+ HttpResponseMessage responseMessage = httpClient.PutAsync(postUrl, httpContent).Result;
+ result = responseMessage.Content.ReadAsStringAsync().Result;
+ }
+
+ }
+
+ Newtonsoft.Json.Linq.JObject jobject = (Newtonsoft.Json.Linq.JObject)JsonConvert.DeserializeObject(result);
+
+ if (jobject["error"] == null && jobject["_shards"]["successful"].ToString() == "1")
+ {
+
+ }
+ else
+ {
+ throw new Exception("淇濆瓨鏁版嵁搴撳嚭閿欙紒");
+ }
+
+
+ _ccontext.SaveChanges();
+
+ Thread.CurrentThread.Join(1000 * 10);//闃绘璁惧畾鏃堕棿
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ }
+
+
+ }
}
}
+ page += 1;
}
- page += 1;
+ catch (Exception ex)
+ {
+ logg.WriteLog(ex.Message, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.StackTrace, "涓浗鏀块噰缃戝洓宸濈渷");
+
+ logg.WriteLog(ex.ToString(), "涓浗鏀块噰缃戝洓宸濈渷");
+ Thread.CurrentThread.Join(1000 * 60 * 5);//闃绘璁惧畾鏃堕棿
+ }
+
+
+
}
--
Gitblit v1.9.1