feat: 新增中广核电子商务平台爬虫
- 新增 CgnpcCrawler 爬虫类,支持中广核电子商务平台招标信息抓取 - 目标平台:https://ecp.cgnpc.com.cn,专注于核电行业采购招标 - 使用正则表达式提取招标公告,支持标题、URL和发布日期解析 - 添加完整的人类行为模拟功能(鼠标移动、滚动)降低检测风险 - 支持分页抓取,最多抓取5页数据,使用中广核特有的 btn-prev 按钮翻页 - 添加完整的单元测试文件,确保爬虫功能正常 - 统一的错误处理机制,单个爬虫失败不影响整体任务执行
This commit is contained in:
@@ -8,6 +8,7 @@ import { SzecpCrawler } from './szecp_target';
|
||||
import { CdtCrawler } from './cdt_target';
|
||||
import { EpsCrawler } from './eps_target';
|
||||
import { CnncecpCrawler } from './cnncecp_target';
|
||||
import { CgnpcCrawler } from './cgnpc_target';
|
||||
|
||||
@Injectable()
|
||||
export class BidCrawlerService {
|
||||
@@ -51,7 +52,7 @@ export class BidCrawlerService {
|
||||
args,
|
||||
});
|
||||
|
||||
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler, CnncecpCrawler];
|
||||
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler, CnncecpCrawler, CgnpcCrawler];
|
||||
|
||||
try {
|
||||
for (const crawler of crawlers) {
|
||||
|
||||
Reference in New Issue
Block a user