feat: 添加爬虫统计信息页面和功能

新增爬虫统计信息页面,展示各来源的爬取数量、最新更新时间、错误信息等统计指标
后端添加爬虫统计信息存储和查询接口,记录每次爬取的结果
支持按关键词过滤招标信息查询
This commit is contained in:
dmy
2026-01-12 22:00:39 +08:00
parent 533d7b60fb
commit bfac194c14
10 changed files with 278 additions and 7 deletions

View File

@@ -1,7 +1,10 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import * as puppeteer from 'puppeteer';
import { BidsService } from '../../bids/services/bid.service';
import { CrawlInfoAdd } from '../entities/crawl-info-add.entity';
import { ChdtpCrawler } from './chdtp_target';
import { ChngCrawler } from './chng_target';
import { SzecpCrawler } from './szecp_target';
@@ -22,6 +25,8 @@ export class BidCrawlerService {
constructor(
private bidsService: BidsService,
private configService: ConfigService,
@InjectRepository(CrawlInfoAdd)
private crawlInfoRepository: Repository<CrawlInfoAdd>,
) {}
async crawlAll() {
@@ -93,6 +98,14 @@ export class BidCrawlerService {
zeroDataCrawlers.push(crawler);
}
// 获取最新的发布日期
const latestPublishDate = results.length > 0
? results.reduce((latest, item) => {
const itemDate = new Date(item.publishDate);
return itemDate > latest ? itemDate : latest;
}, new Date(0))
: null;
for (const item of results) {
await this.bidsService.createOrUpdate({
title: item.title,
@@ -102,10 +115,16 @@ export class BidCrawlerService {
unit: '',
});
}
// 保存爬虫统计信息到数据库
await this.saveCrawlInfo(crawler.name, results.length, latestPublishDate);
} catch (err) {
this.logger.error(`Error crawling ${crawler.name}: ${err.message}`);
// 记录错误信息
crawlResults[crawler.name] = { success: 0, error: err.message };
// 保存错误信息到数据库
await this.saveCrawlInfo(crawler.name, 0, null, err.message);
}
}
@@ -131,6 +150,14 @@ export class BidCrawlerService {
// 更新统计结果
crawlResults[crawler.name] = { success: results.length };
// 获取最新的发布日期
const latestPublishDate = results.length > 0
? results.reduce((latest, item) => {
const itemDate = new Date(item.publishDate);
return itemDate > latest ? itemDate : latest;
}, new Date(0))
: null;
for (const item of results) {
await this.bidsService.createOrUpdate({
title: item.title,
@@ -140,10 +167,16 @@ export class BidCrawlerService {
unit: '',
});
}
// 更新爬虫统计信息到数据库
await this.saveCrawlInfo(crawler.name, results.length, latestPublishDate);
} catch (err) {
this.logger.error(`Error retrying ${crawler.name}: ${err.message}`);
// 记录错误信息
crawlResults[crawler.name] = { success: 0, error: err.message };
// 更新错误信息到数据库
await this.saveCrawlInfo(crawler.name, 0, null, err.message);
}
}
}
@@ -184,4 +217,24 @@ export class BidCrawlerService {
this.logger.log('='.repeat(50));
}
}
private async saveCrawlInfo(
source: string,
count: number,
latestPublishDate: Date | null,
error?: string,
) {
try {
const crawlInfo = this.crawlInfoRepository.create({
source,
count,
latestPublishDate,
error,
});
await this.crawlInfoRepository.save(crawlInfo);
this.logger.log(`Saved crawl info for ${source}: ${count} items`);
} catch (err) {
this.logger.error(`Failed to save crawl info for ${source}: ${err.message}`);
}
}
}