feat: 新增中核集团电子采购平台爬虫

- 新增 CnncecpCrawler 爬虫类,支持中核集团电子采购平台招标信息抓取
- 目标平台:https://www.cnncecp.com,专注于核工业领域采购招标
- 使用正则表达式提取招标公告,支持标题、URL和发布日期解析
- 添加完整的人类行为模拟功能(鼠标移动、滚动)降低检测风险
- 支持分页抓取,最多抓取5页数据,使用 index_ 模式进行翻页
- 添加完整的单元测试文件,确保爬虫功能正常
- 统一的错误处理机制,单个爬虫失败不影响整体任务执行
This commit is contained in:
dmy
2026-01-11 23:32:09 +08:00
parent 74a4aec363
commit bf17587bd3
3 changed files with 247 additions and 1 deletions

View File

@@ -7,6 +7,7 @@ import { ChngCrawler } from './chng_target';
import { SzecpCrawler } from './szecp_target';
import { CdtCrawler } from './cdt_target';
import { EpsCrawler } from './eps_target';
import { CnncecpCrawler } from './cnncecp_target';
@Injectable()
export class BidCrawlerService {
@@ -50,7 +51,7 @@ export class BidCrawlerService {
args,
});
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler];
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler, CnncecpCrawler];
try {
for (const crawler of crawlers) {