feat: 添加爬虫统计信息页面和功能
新增爬虫统计信息页面,展示各来源的爬取数量、最新更新时间、错误信息等统计指标 后端添加爬虫统计信息存储和查询接口,记录每次爬取的结果 支持按关键词过滤招标信息查询
This commit is contained in:
@@ -6,7 +6,7 @@
|
|||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "vite",
|
"dev": "vite",
|
||||||
"build": "vue-tsc -b && vite build",
|
"build": "vue-tsc -b && vite build",
|
||||||
"build:watch": "vue-tsc -b && vite build --watch",
|
"build:watch": "concurrently \"vue-tsc -b --watch\" \"vite build --watch\"",
|
||||||
"preview": "vite preview"
|
"preview": "vite preview"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
@@ -26,6 +26,10 @@
|
|||||||
<el-icon><Setting /></el-icon>
|
<el-icon><Setting /></el-icon>
|
||||||
<span>Keywords</span>
|
<span>Keywords</span>
|
||||||
</el-menu-item>
|
</el-menu-item>
|
||||||
|
<el-menu-item index="5">
|
||||||
|
<el-icon><Connection /></el-icon>
|
||||||
|
<span>Crawl Info</span>
|
||||||
|
</el-menu-item>
|
||||||
</el-menu>
|
</el-menu>
|
||||||
</el-aside>
|
</el-aside>
|
||||||
|
|
||||||
@@ -66,6 +70,10 @@
|
|||||||
:loading="loading"
|
:loading="loading"
|
||||||
@refresh="fetchData"
|
@refresh="fetchData"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<CrawlInfo
|
||||||
|
v-if="activeIndex === '5'"
|
||||||
|
/>
|
||||||
</el-main>
|
</el-main>
|
||||||
</el-container>
|
</el-container>
|
||||||
</el-container>
|
</el-container>
|
||||||
@@ -75,11 +83,12 @@
|
|||||||
import { ref, onMounted } from 'vue'
|
import { ref, onMounted } from 'vue'
|
||||||
import axios from 'axios'
|
import axios from 'axios'
|
||||||
import { ElMessage } from 'element-plus'
|
import { ElMessage } from 'element-plus'
|
||||||
import { DataBoard, Document, Setting, MagicStick } from '@element-plus/icons-vue'
|
import { DataBoard, Document, Setting, MagicStick, Connection } from '@element-plus/icons-vue'
|
||||||
import Dashboard from './components/Dashboard.vue'
|
import Dashboard from './components/Dashboard.vue'
|
||||||
import DashboardAI from './components/Dashboard-AI.vue'
|
import DashboardAI from './components/Dashboard-AI.vue'
|
||||||
import Bids from './components/Bids.vue'
|
import Bids from './components/Bids.vue'
|
||||||
import Keywords from './components/Keywords.vue'
|
import Keywords from './components/Keywords.vue'
|
||||||
|
import CrawlInfo from './components/CrawlInfo.vue'
|
||||||
|
|
||||||
const activeIndex = ref('1')
|
const activeIndex = ref('1')
|
||||||
const bids = ref<any[]>([])
|
const bids = ref<any[]>([])
|
||||||
|
|||||||
135
frontend/src/components/CrawlInfo.vue
Normal file
135
frontend/src/components/CrawlInfo.vue
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
<template>
|
||||||
|
<div class="crawl-info">
|
||||||
|
<el-card>
|
||||||
|
<template #header>
|
||||||
|
<div class="card-header">
|
||||||
|
<span>爬虫统计信息</span>
|
||||||
|
<el-button type="primary" size="small" @click="fetchCrawlStats" :loading="loading">
|
||||||
|
<el-icon><Refresh /></el-icon>
|
||||||
|
刷新
|
||||||
|
</el-button>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<el-table :data="crawlStats" stripe style="width: 100%" v-loading="loading">
|
||||||
|
<el-table-column prop="source" label="爬虫来源" width="200" />
|
||||||
|
<el-table-column prop="count" label="本次获取数量" width="120" sortable />
|
||||||
|
<el-table-column label="最近更新时间" width="180">
|
||||||
|
<template #default="{ row }">
|
||||||
|
{{ formatDate(row.latestUpdate) }}
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
<el-table-column label="最新工程时间" width="180">
|
||||||
|
<template #default="{ row }">
|
||||||
|
{{ formatDate(row.latestPublishDate) }}
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
<el-table-column label="状态" width="100">
|
||||||
|
<template #default="{ row }">
|
||||||
|
<el-tag :type="row.error ? 'danger' : (row.count > 0 ? 'success' : 'info')">
|
||||||
|
{{ row.error ? '出错' : (row.count > 0 ? '正常' : '无数据') }}
|
||||||
|
</el-tag>
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
<el-table-column label="错误信息" min-width="200">
|
||||||
|
<template #default="{ row }">
|
||||||
|
<span v-if="row.error" style="color: #f56c6c">{{ row.error }}</span>
|
||||||
|
<span v-else>-</span>
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
</el-table>
|
||||||
|
|
||||||
|
<div class="summary" v-if="crawlStats.length > 0">
|
||||||
|
<el-descriptions :column="3" border>
|
||||||
|
<el-descriptions-item label="爬虫来源总数">
|
||||||
|
{{ crawlStats.length }}
|
||||||
|
</el-descriptions-item>
|
||||||
|
<el-descriptions-item label="本次获取总数">
|
||||||
|
{{ totalCount }}
|
||||||
|
</el-descriptions-item>
|
||||||
|
<el-descriptions-item label="有数据来源">
|
||||||
|
{{ activeSources }}
|
||||||
|
</el-descriptions-item>
|
||||||
|
<el-descriptions-item label="出错来源">
|
||||||
|
{{ errorSources }}
|
||||||
|
</el-descriptions-item>
|
||||||
|
</el-descriptions>
|
||||||
|
</div>
|
||||||
|
</el-card>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, computed, onMounted } from 'vue'
|
||||||
|
import axios from 'axios'
|
||||||
|
import { ElMessage } from 'element-plus'
|
||||||
|
import { Refresh } from '@element-plus/icons-vue'
|
||||||
|
|
||||||
|
interface CrawlStat {
|
||||||
|
source: string
|
||||||
|
count: number
|
||||||
|
latestUpdate: string | null
|
||||||
|
latestPublishDate: string | null
|
||||||
|
error: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
const crawlStats = ref<CrawlStat[]>([])
|
||||||
|
const loading = ref(false)
|
||||||
|
|
||||||
|
const totalCount = computed(() => {
|
||||||
|
return crawlStats.value.reduce((sum, item) => sum + item.count, 0)
|
||||||
|
})
|
||||||
|
|
||||||
|
const activeSources = computed(() => {
|
||||||
|
return crawlStats.value.filter(item => item.count > 0).length
|
||||||
|
})
|
||||||
|
|
||||||
|
const errorSources = computed(() => {
|
||||||
|
return crawlStats.value.filter(item => item.error).length
|
||||||
|
})
|
||||||
|
|
||||||
|
const formatDate = (dateStr: string | null) => {
|
||||||
|
if (!dateStr) return '-'
|
||||||
|
const date = new Date(dateStr)
|
||||||
|
return date.toLocaleString('zh-CN', {
|
||||||
|
year: 'numeric',
|
||||||
|
month: '2-digit',
|
||||||
|
day: '2-digit',
|
||||||
|
hour: '2-digit',
|
||||||
|
minute: '2-digit'
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const fetchCrawlStats = async () => {
|
||||||
|
loading.value = true
|
||||||
|
try {
|
||||||
|
const res = await axios.get('/api/bids/crawl-info-stats')
|
||||||
|
crawlStats.value = res.data
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch crawl stats:', error)
|
||||||
|
ElMessage.error('获取爬虫统计信息失败')
|
||||||
|
} finally {
|
||||||
|
loading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onMounted(() => {
|
||||||
|
fetchCrawlStats()
|
||||||
|
})
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<style scoped>
|
||||||
|
.crawl-info {
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.summary {
|
||||||
|
margin-top: 20px;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
@@ -55,6 +55,7 @@
|
|||||||
"@types/jest": "^30.0.0",
|
"@types/jest": "^30.0.0",
|
||||||
"@types/node": "^22.10.7",
|
"@types/node": "^22.10.7",
|
||||||
"@types/supertest": "^6.0.2",
|
"@types/supertest": "^6.0.2",
|
||||||
|
"concurrently": "^9.2.1",
|
||||||
"eslint": "^9.18.0",
|
"eslint": "^9.18.0",
|
||||||
"eslint-config-prettier": "^10.0.1",
|
"eslint-config-prettier": "^10.0.1",
|
||||||
"eslint-plugin-prettier": "^5.2.2",
|
"eslint-plugin-prettier": "^5.2.2",
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import { AiModule } from './ai/ai.module';
|
|||||||
ScheduleModule.forRoot(),
|
ScheduleModule.forRoot(),
|
||||||
ServeStaticModule.forRoot({
|
ServeStaticModule.forRoot({
|
||||||
rootPath: join(__dirname, '..', 'frontend', 'dist'),
|
rootPath: join(__dirname, '..', 'frontend', 'dist'),
|
||||||
exclude: ['/api/(.*)'],
|
exclude: ['/api/:path(*)'],
|
||||||
}),
|
}),
|
||||||
LoggerModule,
|
LoggerModule,
|
||||||
DatabaseModule,
|
DatabaseModule,
|
||||||
|
|||||||
@@ -26,7 +26,13 @@ export class BidsController {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Get('by-date-range')
|
@Get('by-date-range')
|
||||||
getByDateRange(@Query('startDate') startDate: string, @Query('endDate') endDate: string) {
|
getByDateRange(@Query('startDate') startDate: string, @Query('endDate') endDate?: string, @Query('keywords') keywords?: string) {
|
||||||
return this.bidsService.getBidsByDateRange(startDate, endDate);
|
const keywordsArray = keywords ? keywords.split(',') : undefined;
|
||||||
|
return this.bidsService.getBidsByDateRange(startDate, endDate, keywordsArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Get('crawl-info-stats')
|
||||||
|
getCrawlInfoStats() {
|
||||||
|
return this.bidsService.getCrawlInfoAddStats();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ export class BidsService {
|
|||||||
.getMany();
|
.getMany();
|
||||||
}
|
}
|
||||||
|
|
||||||
async getBidsByDateRange(startDate?: string, endDate?: string) {
|
async getBidsByDateRange(startDate?: string, endDate?: string, keywords?: string[]) {
|
||||||
const qb = this.bidRepository.createQueryBuilder('bid');
|
const qb = this.bidRepository.createQueryBuilder('bid');
|
||||||
|
|
||||||
if (startDate) {
|
if (startDate) {
|
||||||
@@ -103,6 +103,49 @@ export class BidsService {
|
|||||||
qb.andWhere('bid.publishDate <= :endDate', { endDate: end });
|
qb.andWhere('bid.publishDate <= :endDate', { endDate: end });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (keywords && keywords.length > 0) {
|
||||||
|
const keywordConditions = keywords.map((keyword, index) => {
|
||||||
|
return `bid.title LIKE :keyword${index}`;
|
||||||
|
}).join(' OR ');
|
||||||
|
qb.andWhere(`(${keywordConditions})`, keywords.reduce((params, keyword, index) => {
|
||||||
|
params[`keyword${index}`] = `%${keyword}%`;
|
||||||
|
return params;
|
||||||
|
}, {}));
|
||||||
|
}
|
||||||
|
|
||||||
return qb.orderBy('bid.publishDate', 'DESC').getMany();
|
return qb.orderBy('bid.publishDate', 'DESC').getMany();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getCrawlInfoAddStats() {
|
||||||
|
const { InjectRepository } = require('@nestjs/typeorm');
|
||||||
|
const { Repository } = require('typeorm');
|
||||||
|
const { CrawlInfoAdd } = require('../../crawler/entities/crawl-info-add.entity');
|
||||||
|
|
||||||
|
// 获取每个来源的最新一次爬虫记录
|
||||||
|
const query = `
|
||||||
|
SELECT
|
||||||
|
source,
|
||||||
|
count,
|
||||||
|
latestPublishDate,
|
||||||
|
error,
|
||||||
|
createdAt as latestUpdate
|
||||||
|
FROM crawl_info_add
|
||||||
|
WHERE id IN (
|
||||||
|
SELECT MAX(id)
|
||||||
|
FROM crawl_info_add
|
||||||
|
GROUP BY source
|
||||||
|
)
|
||||||
|
ORDER BY source ASC
|
||||||
|
`;
|
||||||
|
|
||||||
|
const results = await this.bidRepository.query(query);
|
||||||
|
|
||||||
|
return results.map((item: any) => ({
|
||||||
|
source: item.source,
|
||||||
|
count: item.count,
|
||||||
|
latestUpdate: item.latestUpdate,
|
||||||
|
latestPublishDate: item.latestPublishDate,
|
||||||
|
error: item.error,
|
||||||
|
}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
import { Module } from '@nestjs/common';
|
import { Module } from '@nestjs/common';
|
||||||
|
import { TypeOrmModule } from '@nestjs/typeorm';
|
||||||
import { BidCrawlerService } from './services/bid-crawler.service';
|
import { BidCrawlerService } from './services/bid-crawler.service';
|
||||||
import { CrawlerController } from './crawler.controller';
|
import { CrawlerController } from './crawler.controller';
|
||||||
import { BidsModule } from '../bids/bids.module';
|
import { BidsModule } from '../bids/bids.module';
|
||||||
|
import { CrawlInfoAdd } from './entities/crawl-info-add.entity';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
imports: [BidsModule],
|
imports: [BidsModule, TypeOrmModule.forFeature([CrawlInfoAdd])],
|
||||||
controllers: [CrawlerController],
|
controllers: [CrawlerController],
|
||||||
providers: [BidCrawlerService],
|
providers: [BidCrawlerService],
|
||||||
exports: [BidCrawlerService],
|
exports: [BidCrawlerService],
|
||||||
|
|||||||
22
src/crawler/entities/crawl-info-add.entity.ts
Normal file
22
src/crawler/entities/crawl-info-add.entity.ts
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn } from 'typeorm';
|
||||||
|
|
||||||
|
@Entity('crawl_info_add')
|
||||||
|
export class CrawlInfoAdd {
|
||||||
|
@PrimaryGeneratedColumn('uuid')
|
||||||
|
id: string;
|
||||||
|
|
||||||
|
@Column()
|
||||||
|
source: string;
|
||||||
|
|
||||||
|
@Column()
|
||||||
|
count: number;
|
||||||
|
|
||||||
|
@Column({ type: 'datetime', nullable: true })
|
||||||
|
latestPublishDate: Date | null;
|
||||||
|
|
||||||
|
@Column({ type: 'text', nullable: true })
|
||||||
|
error: string;
|
||||||
|
|
||||||
|
@CreateDateColumn()
|
||||||
|
createdAt: Date;
|
||||||
|
}
|
||||||
@@ -1,7 +1,10 @@
|
|||||||
import { Injectable, Logger } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import { ConfigService } from '@nestjs/config';
|
import { ConfigService } from '@nestjs/config';
|
||||||
|
import { InjectRepository } from '@nestjs/typeorm';
|
||||||
|
import { Repository } from 'typeorm';
|
||||||
import * as puppeteer from 'puppeteer';
|
import * as puppeteer from 'puppeteer';
|
||||||
import { BidsService } from '../../bids/services/bid.service';
|
import { BidsService } from '../../bids/services/bid.service';
|
||||||
|
import { CrawlInfoAdd } from '../entities/crawl-info-add.entity';
|
||||||
import { ChdtpCrawler } from './chdtp_target';
|
import { ChdtpCrawler } from './chdtp_target';
|
||||||
import { ChngCrawler } from './chng_target';
|
import { ChngCrawler } from './chng_target';
|
||||||
import { SzecpCrawler } from './szecp_target';
|
import { SzecpCrawler } from './szecp_target';
|
||||||
@@ -22,6 +25,8 @@ export class BidCrawlerService {
|
|||||||
constructor(
|
constructor(
|
||||||
private bidsService: BidsService,
|
private bidsService: BidsService,
|
||||||
private configService: ConfigService,
|
private configService: ConfigService,
|
||||||
|
@InjectRepository(CrawlInfoAdd)
|
||||||
|
private crawlInfoRepository: Repository<CrawlInfoAdd>,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
async crawlAll() {
|
async crawlAll() {
|
||||||
@@ -93,6 +98,14 @@ export class BidCrawlerService {
|
|||||||
zeroDataCrawlers.push(crawler);
|
zeroDataCrawlers.push(crawler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 获取最新的发布日期
|
||||||
|
const latestPublishDate = results.length > 0
|
||||||
|
? results.reduce((latest, item) => {
|
||||||
|
const itemDate = new Date(item.publishDate);
|
||||||
|
return itemDate > latest ? itemDate : latest;
|
||||||
|
}, new Date(0))
|
||||||
|
: null;
|
||||||
|
|
||||||
for (const item of results) {
|
for (const item of results) {
|
||||||
await this.bidsService.createOrUpdate({
|
await this.bidsService.createOrUpdate({
|
||||||
title: item.title,
|
title: item.title,
|
||||||
@@ -102,10 +115,16 @@ export class BidCrawlerService {
|
|||||||
unit: '',
|
unit: '',
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 保存爬虫统计信息到数据库
|
||||||
|
await this.saveCrawlInfo(crawler.name, results.length, latestPublishDate);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.logger.error(`Error crawling ${crawler.name}: ${err.message}`);
|
this.logger.error(`Error crawling ${crawler.name}: ${err.message}`);
|
||||||
// 记录错误信息
|
// 记录错误信息
|
||||||
crawlResults[crawler.name] = { success: 0, error: err.message };
|
crawlResults[crawler.name] = { success: 0, error: err.message };
|
||||||
|
|
||||||
|
// 保存错误信息到数据库
|
||||||
|
await this.saveCrawlInfo(crawler.name, 0, null, err.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,6 +150,14 @@ export class BidCrawlerService {
|
|||||||
// 更新统计结果
|
// 更新统计结果
|
||||||
crawlResults[crawler.name] = { success: results.length };
|
crawlResults[crawler.name] = { success: results.length };
|
||||||
|
|
||||||
|
// 获取最新的发布日期
|
||||||
|
const latestPublishDate = results.length > 0
|
||||||
|
? results.reduce((latest, item) => {
|
||||||
|
const itemDate = new Date(item.publishDate);
|
||||||
|
return itemDate > latest ? itemDate : latest;
|
||||||
|
}, new Date(0))
|
||||||
|
: null;
|
||||||
|
|
||||||
for (const item of results) {
|
for (const item of results) {
|
||||||
await this.bidsService.createOrUpdate({
|
await this.bidsService.createOrUpdate({
|
||||||
title: item.title,
|
title: item.title,
|
||||||
@@ -140,10 +167,16 @@ export class BidCrawlerService {
|
|||||||
unit: '',
|
unit: '',
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 更新爬虫统计信息到数据库
|
||||||
|
await this.saveCrawlInfo(crawler.name, results.length, latestPublishDate);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.logger.error(`Error retrying ${crawler.name}: ${err.message}`);
|
this.logger.error(`Error retrying ${crawler.name}: ${err.message}`);
|
||||||
// 记录错误信息
|
// 记录错误信息
|
||||||
crawlResults[crawler.name] = { success: 0, error: err.message };
|
crawlResults[crawler.name] = { success: 0, error: err.message };
|
||||||
|
|
||||||
|
// 更新错误信息到数据库
|
||||||
|
await this.saveCrawlInfo(crawler.name, 0, null, err.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -184,4 +217,24 @@ export class BidCrawlerService {
|
|||||||
this.logger.log('='.repeat(50));
|
this.logger.log('='.repeat(50));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async saveCrawlInfo(
|
||||||
|
source: string,
|
||||||
|
count: number,
|
||||||
|
latestPublishDate: Date | null,
|
||||||
|
error?: string,
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
const crawlInfo = this.crawlInfoRepository.create({
|
||||||
|
source,
|
||||||
|
count,
|
||||||
|
latestPublishDate,
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
await this.crawlInfoRepository.save(crawlInfo);
|
||||||
|
this.logger.log(`Saved crawl info for ${source}: ${count} items`);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error(`Failed to save crawl info for ${source}: ${err.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user