Files
bidding_watcher/src/bids/services/bid.service.ts
dmy bfac194c14 feat: 添加爬虫统计信息页面和功能
新增爬虫统计信息页面,展示各来源的爬取数量、最新更新时间、错误信息等统计指标
后端添加爬虫统计信息存储和查询接口,记录每次爬取的结果
支持按关键词过滤招标信息查询
2026-01-12 22:00:39 +08:00

152 lines
4.3 KiB
TypeScript

import { Injectable } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository, LessThan, MoreThanOrEqual } from 'typeorm';
import { BidItem } from '../entities/bid-item.entity';
@Injectable()
export class BidsService {
constructor(
@InjectRepository(BidItem)
private bidRepository: Repository<BidItem>,
) {}
async findAll(query?: any) {
const { page = 1, limit = 10, source, keyword } = query || {};
const qb = this.bidRepository.createQueryBuilder('bid');
if (source) {
qb.andWhere('bid.source = :source', { source });
}
if (keyword) {
qb.andWhere('bid.title LIKE :keyword', { keyword: `%${keyword}%` });
}
qb.orderBy('bid.publishDate', 'DESC')
.skip((page - 1) * limit)
.take(limit);
const [items, total] = await qb.getManyAndCount();
return { items, total };
}
getHighPriority() {
return this.bidRepository.find({
where: { priority: LessThan(0) }, // This is just a placeholder logic, priority should be > 0
order: { priority: 'DESC', publishDate: 'DESC' },
take: 10,
});
}
// Update logic for priority
async getHighPriorityCorrected() {
return this.bidRepository.createQueryBuilder('bid')
.where('bid.priority > 0')
.orderBy('bid.priority', 'DESC')
.addOrderBy('bid.publishDate', 'DESC')
.limit(10)
.getMany();
}
async createOrUpdate(data: Partial<BidItem>) {
// Use URL or a hash of URL to check for duplicates
let item = await this.bidRepository.findOne({ where: { url: data.url } });
if (item) {
Object.assign(item, data);
return this.bidRepository.save(item);
}
return this.bidRepository.save(data);
}
async cleanOldData() {
const thirtyDaysAgo = new Date();
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
return this.bidRepository.delete({
createdAt: LessThan(thirtyDaysAgo),
});
}
async getSources() {
const result = await this.bidRepository
.createQueryBuilder('bid')
.select('DISTINCT bid.source')
.where('bid.source IS NOT NULL')
.orderBy('bid.source', 'ASC')
.getRawMany();
return result.map((item: any) => item.source);
}
async getRecentBids() {
const thirtyDaysAgo = new Date();
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
thirtyDaysAgo.setHours(0, 0, 0, 0);
return this.bidRepository
.createQueryBuilder('bid')
.where('bid.publishDate >= :thirtyDaysAgo', { thirtyDaysAgo })
.orderBy('bid.publishDate', 'DESC')
.getMany();
}
async getBidsByDateRange(startDate?: string, endDate?: string, keywords?: string[]) {
const qb = this.bidRepository.createQueryBuilder('bid');
if (startDate) {
const start = new Date(startDate);
start.setHours(0, 0, 0, 0);
qb.andWhere('bid.publishDate >= :startDate', { startDate: start });
}
if (endDate) {
const end = new Date(endDate);
end.setHours(23, 59, 59, 999);
qb.andWhere('bid.publishDate <= :endDate', { endDate: end });
}
if (keywords && keywords.length > 0) {
const keywordConditions = keywords.map((keyword, index) => {
return `bid.title LIKE :keyword${index}`;
}).join(' OR ');
qb.andWhere(`(${keywordConditions})`, keywords.reduce((params, keyword, index) => {
params[`keyword${index}`] = `%${keyword}%`;
return params;
}, {}));
}
return qb.orderBy('bid.publishDate', 'DESC').getMany();
}
async getCrawlInfoAddStats() {
const { InjectRepository } = require('@nestjs/typeorm');
const { Repository } = require('typeorm');
const { CrawlInfoAdd } = require('../../crawler/entities/crawl-info-add.entity');
// 获取每个来源的最新一次爬虫记录
const query = `
SELECT
source,
count,
latestPublishDate,
error,
createdAt as latestUpdate
FROM crawl_info_add
WHERE id IN (
SELECT MAX(id)
FROM crawl_info_add
GROUP BY source
)
ORDER BY source ASC
`;
const results = await this.bidRepository.query(query);
return results.map((item: any) => ({
source: item.source,
count: item.count,
latestUpdate: item.latestUpdate,
latestPublishDate: item.latestPublishDate,
error: item.error,
}));
}
}