From bfac194c14e8b9b174a6e9d15fc282db5f1f11f9 Mon Sep 17 00:00:00 2001 From: dmy Date: Mon, 12 Jan 2026 22:00:39 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E7=BB=9F=E8=AE=A1=E4=BF=A1=E6=81=AF=E9=A1=B5=E9=9D=A2=E5=92=8C?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增爬虫统计信息页面,展示各来源的爬取数量、最新更新时间、错误信息等统计指标 后端添加爬虫统计信息存储和查询接口,记录每次爬取的结果 支持按关键词过滤招标信息查询 --- frontend/package.json | 2 +- frontend/src/App.vue | 11 +- frontend/src/components/CrawlInfo.vue | 135 ++++++++++++++++++ package.json | 1 + src/app.module.ts | 2 +- src/bids/controllers/bid.controller.ts | 10 +- src/bids/services/bid.service.ts | 45 +++++- src/crawler/crawler.module.ts | 4 +- src/crawler/entities/crawl-info-add.entity.ts | 22 +++ src/crawler/services/bid-crawler.service.ts | 53 +++++++ 10 files changed, 278 insertions(+), 7 deletions(-) create mode 100644 frontend/src/components/CrawlInfo.vue create mode 100644 src/crawler/entities/crawl-info-add.entity.ts diff --git a/frontend/package.json b/frontend/package.json index a23afe8..32f7de3 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -6,7 +6,7 @@ "scripts": { "dev": "vite", "build": "vue-tsc -b && vite build", - "build:watch": "vue-tsc -b && vite build --watch", + "build:watch": "concurrently \"vue-tsc -b --watch\" \"vite build --watch\"", "preview": "vite preview" }, "dependencies": { diff --git a/frontend/src/App.vue b/frontend/src/App.vue index d9ffc94..c161c56 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -26,6 +26,10 @@ Keywords + + + Crawl Info + @@ -66,6 +70,10 @@ :loading="loading" @refresh="fetchData" /> + + @@ -75,11 +83,12 @@ import { ref, onMounted } from 'vue' import axios from 'axios' import { ElMessage } from 'element-plus' -import { DataBoard, Document, Setting, MagicStick } from '@element-plus/icons-vue' +import { DataBoard, Document, Setting, MagicStick, Connection } from '@element-plus/icons-vue' import Dashboard from './components/Dashboard.vue' import DashboardAI from './components/Dashboard-AI.vue' import Bids from './components/Bids.vue' import Keywords from './components/Keywords.vue' +import CrawlInfo from './components/CrawlInfo.vue' const activeIndex = ref('1') const bids = ref([]) diff --git a/frontend/src/components/CrawlInfo.vue b/frontend/src/components/CrawlInfo.vue new file mode 100644 index 0000000..0830bc6 --- /dev/null +++ b/frontend/src/components/CrawlInfo.vue @@ -0,0 +1,135 @@ + + + + + diff --git a/package.json b/package.json index e7bd0ee..e761fe2 100644 --- a/package.json +++ b/package.json @@ -55,6 +55,7 @@ "@types/jest": "^30.0.0", "@types/node": "^22.10.7", "@types/supertest": "^6.0.2", + "concurrently": "^9.2.1", "eslint": "^9.18.0", "eslint-config-prettier": "^10.0.1", "eslint-plugin-prettier": "^5.2.2", diff --git a/src/app.module.ts b/src/app.module.ts index 85840be..d2ae4d6 100644 --- a/src/app.module.ts +++ b/src/app.module.ts @@ -17,7 +17,7 @@ import { AiModule } from './ai/ai.module'; ScheduleModule.forRoot(), ServeStaticModule.forRoot({ rootPath: join(__dirname, '..', 'frontend', 'dist'), - exclude: ['/api/(.*)'], + exclude: ['/api/:path(*)'], }), LoggerModule, DatabaseModule, diff --git a/src/bids/controllers/bid.controller.ts b/src/bids/controllers/bid.controller.ts index e7ca920..87f63c7 100644 --- a/src/bids/controllers/bid.controller.ts +++ b/src/bids/controllers/bid.controller.ts @@ -26,7 +26,13 @@ export class BidsController { } @Get('by-date-range') - getByDateRange(@Query('startDate') startDate: string, @Query('endDate') endDate: string) { - return this.bidsService.getBidsByDateRange(startDate, endDate); + getByDateRange(@Query('startDate') startDate: string, @Query('endDate') endDate?: string, @Query('keywords') keywords?: string) { + const keywordsArray = keywords ? keywords.split(',') : undefined; + return this.bidsService.getBidsByDateRange(startDate, endDate, keywordsArray); + } + + @Get('crawl-info-stats') + getCrawlInfoStats() { + return this.bidsService.getCrawlInfoAddStats(); } } diff --git a/src/bids/services/bid.service.ts b/src/bids/services/bid.service.ts index 7bf8262..1a81ae5 100644 --- a/src/bids/services/bid.service.ts +++ b/src/bids/services/bid.service.ts @@ -88,7 +88,7 @@ export class BidsService { .getMany(); } - async getBidsByDateRange(startDate?: string, endDate?: string) { + async getBidsByDateRange(startDate?: string, endDate?: string, keywords?: string[]) { const qb = this.bidRepository.createQueryBuilder('bid'); if (startDate) { @@ -103,6 +103,49 @@ export class BidsService { qb.andWhere('bid.publishDate <= :endDate', { endDate: end }); } + if (keywords && keywords.length > 0) { + const keywordConditions = keywords.map((keyword, index) => { + return `bid.title LIKE :keyword${index}`; + }).join(' OR '); + qb.andWhere(`(${keywordConditions})`, keywords.reduce((params, keyword, index) => { + params[`keyword${index}`] = `%${keyword}%`; + return params; + }, {})); + } + return qb.orderBy('bid.publishDate', 'DESC').getMany(); } + + async getCrawlInfoAddStats() { + const { InjectRepository } = require('@nestjs/typeorm'); + const { Repository } = require('typeorm'); + const { CrawlInfoAdd } = require('../../crawler/entities/crawl-info-add.entity'); + + // 获取每个来源的最新一次爬虫记录 + const query = ` + SELECT + source, + count, + latestPublishDate, + error, + createdAt as latestUpdate + FROM crawl_info_add + WHERE id IN ( + SELECT MAX(id) + FROM crawl_info_add + GROUP BY source + ) + ORDER BY source ASC + `; + + const results = await this.bidRepository.query(query); + + return results.map((item: any) => ({ + source: item.source, + count: item.count, + latestUpdate: item.latestUpdate, + latestPublishDate: item.latestPublishDate, + error: item.error, + })); + } } diff --git a/src/crawler/crawler.module.ts b/src/crawler/crawler.module.ts index 0f16eec..759a8aa 100644 --- a/src/crawler/crawler.module.ts +++ b/src/crawler/crawler.module.ts @@ -1,10 +1,12 @@ import { Module } from '@nestjs/common'; +import { TypeOrmModule } from '@nestjs/typeorm'; import { BidCrawlerService } from './services/bid-crawler.service'; import { CrawlerController } from './crawler.controller'; import { BidsModule } from '../bids/bids.module'; +import { CrawlInfoAdd } from './entities/crawl-info-add.entity'; @Module({ - imports: [BidsModule], + imports: [BidsModule, TypeOrmModule.forFeature([CrawlInfoAdd])], controllers: [CrawlerController], providers: [BidCrawlerService], exports: [BidCrawlerService], diff --git a/src/crawler/entities/crawl-info-add.entity.ts b/src/crawler/entities/crawl-info-add.entity.ts new file mode 100644 index 0000000..e44695e --- /dev/null +++ b/src/crawler/entities/crawl-info-add.entity.ts @@ -0,0 +1,22 @@ +import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn } from 'typeorm'; + +@Entity('crawl_info_add') +export class CrawlInfoAdd { + @PrimaryGeneratedColumn('uuid') + id: string; + + @Column() + source: string; + + @Column() + count: number; + + @Column({ type: 'datetime', nullable: true }) + latestPublishDate: Date | null; + + @Column({ type: 'text', nullable: true }) + error: string; + + @CreateDateColumn() + createdAt: Date; +} diff --git a/src/crawler/services/bid-crawler.service.ts b/src/crawler/services/bid-crawler.service.ts index 65aa2ca..2673657 100644 --- a/src/crawler/services/bid-crawler.service.ts +++ b/src/crawler/services/bid-crawler.service.ts @@ -1,7 +1,10 @@ import { Injectable, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; +import { InjectRepository } from '@nestjs/typeorm'; +import { Repository } from 'typeorm'; import * as puppeteer from 'puppeteer'; import { BidsService } from '../../bids/services/bid.service'; +import { CrawlInfoAdd } from '../entities/crawl-info-add.entity'; import { ChdtpCrawler } from './chdtp_target'; import { ChngCrawler } from './chng_target'; import { SzecpCrawler } from './szecp_target'; @@ -22,6 +25,8 @@ export class BidCrawlerService { constructor( private bidsService: BidsService, private configService: ConfigService, + @InjectRepository(CrawlInfoAdd) + private crawlInfoRepository: Repository, ) {} async crawlAll() { @@ -93,6 +98,14 @@ export class BidCrawlerService { zeroDataCrawlers.push(crawler); } + // 获取最新的发布日期 + const latestPublishDate = results.length > 0 + ? results.reduce((latest, item) => { + const itemDate = new Date(item.publishDate); + return itemDate > latest ? itemDate : latest; + }, new Date(0)) + : null; + for (const item of results) { await this.bidsService.createOrUpdate({ title: item.title, @@ -102,10 +115,16 @@ export class BidCrawlerService { unit: '', }); } + + // 保存爬虫统计信息到数据库 + await this.saveCrawlInfo(crawler.name, results.length, latestPublishDate); } catch (err) { this.logger.error(`Error crawling ${crawler.name}: ${err.message}`); // 记录错误信息 crawlResults[crawler.name] = { success: 0, error: err.message }; + + // 保存错误信息到数据库 + await this.saveCrawlInfo(crawler.name, 0, null, err.message); } } @@ -131,6 +150,14 @@ export class BidCrawlerService { // 更新统计结果 crawlResults[crawler.name] = { success: results.length }; + // 获取最新的发布日期 + const latestPublishDate = results.length > 0 + ? results.reduce((latest, item) => { + const itemDate = new Date(item.publishDate); + return itemDate > latest ? itemDate : latest; + }, new Date(0)) + : null; + for (const item of results) { await this.bidsService.createOrUpdate({ title: item.title, @@ -140,10 +167,16 @@ export class BidCrawlerService { unit: '', }); } + + // 更新爬虫统计信息到数据库 + await this.saveCrawlInfo(crawler.name, results.length, latestPublishDate); } catch (err) { this.logger.error(`Error retrying ${crawler.name}: ${err.message}`); // 记录错误信息 crawlResults[crawler.name] = { success: 0, error: err.message }; + + // 更新错误信息到数据库 + await this.saveCrawlInfo(crawler.name, 0, null, err.message); } } } @@ -184,4 +217,24 @@ export class BidCrawlerService { this.logger.log('='.repeat(50)); } } + + private async saveCrawlInfo( + source: string, + count: number, + latestPublishDate: Date | null, + error?: string, + ) { + try { + const crawlInfo = this.crawlInfoRepository.create({ + source, + count, + latestPublishDate, + error, + }); + await this.crawlInfoRepository.save(crawlInfo); + this.logger.log(`Saved crawl info for ${source}: ${count} items`); + } catch (err) { + this.logger.error(`Failed to save crawl info for ${source}: ${err.message}`); + } + } }