第一次提交

This commit is contained in:
dmy
2026-01-09 23:18:52 +08:00
commit d9105797f4
46 changed files with 15003 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
import { Test, TestingModule } from '@nestjs/testing';
import { AppController } from './app.controller';
import { AppService } from './app.service';
describe('AppController', () => {
let appController: AppController;
beforeEach(async () => {
const app: TestingModule = await Test.createTestingModule({
controllers: [AppController],
providers: [AppService],
}).compile();
appController = app.get<AppController>(AppController);
});
describe('root', () => {
it('should return "Hello World!"', () => {
expect(appController.getHello()).toBe('Hello World!');
});
});
});

12
src/app.controller.ts Normal file
View File

@@ -0,0 +1,12 @@
import { Controller, Get } from '@nestjs/common';
import { AppService } from './app.service';
@Controller()
export class AppController {
constructor(private readonly appService: AppService) {}
@Get()
getHello(): string {
return this.appService.getHello();
}
}

27
src/app.module.ts Normal file
View File

@@ -0,0 +1,27 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { ScheduleModule } from '@nestjs/schedule';
import { ServeStaticModule } from '@nestjs/serve-static';
import { join } from 'path';
import { DatabaseModule } from './database/database.module';
import { BidsModule } from './bids/bids.module';
import { KeywordsModule } from './keywords/keywords.module';
import { CrawlerModule } from './crawler/crawler.module';
import { TasksModule } from './schedule/schedule.module';
@Module({
imports: [
ConfigModule.forRoot({ isGlobal: true }),
ScheduleModule.forRoot(),
ServeStaticModule.forRoot({
rootPath: join(__dirname, '..', 'frontend', 'dist'),
exclude: ['/api*'],
}),
DatabaseModule,
BidsModule,
KeywordsModule,
CrawlerModule,
TasksModule,
],
})
export class AppModule {}

8
src/app.service.ts Normal file
View File

@@ -0,0 +1,8 @@
import { Injectable } from '@nestjs/common';
@Injectable()
export class AppService {
getHello(): string {
return 'Hello World!';
}
}

13
src/bids/bids.module.ts Normal file
View File

@@ -0,0 +1,13 @@
import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { BidItem } from './entities/bid-item.entity';
import { BidsService } from './services/bid.service';
import { BidsController } from './controllers/bid.controller';
@Module({
imports: [TypeOrmModule.forFeature([BidItem])],
providers: [BidsService],
controllers: [BidsController],
exports: [BidsService],
})
export class BidsModule {}

View File

@@ -0,0 +1,17 @@
import { Controller, Get, Query } from '@nestjs/common';
import { BidsService } from '../services/bid.service';
@Controller('api/bids')
export class BidsController {
constructor(private readonly bidsService: BidsService) {}
@Get()
findAll(@Query() query: any) {
return this.bidsService.findAll(query);
}
@Get('high-priority')
getHighPriority() {
return this.bidsService.getHighPriorityCorrected();
}
}

View File

@@ -0,0 +1,28 @@
import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn, UpdateDateColumn } from 'typeorm';
@Entity('bid_items')
export class BidItem {
@PrimaryGeneratedColumn('uuid')
id: string;
@Column()
title: string;
@Column()
url: string;
@Column({ type: 'datetime' })
publishDate: Date;
@Column()
source: string;
@Column({ default: false })
isRead: boolean;
@CreateDateColumn()
createdAt: Date;
@UpdateDateColumn()
updatedAt: Date;
}

View File

@@ -0,0 +1,68 @@
import { Injectable } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository, LessThan } from 'typeorm';
import { BidItem } from '../entities/bid-item.entity';
@Injectable()
export class BidsService {
constructor(
@InjectRepository(BidItem)
private bidRepository: Repository<BidItem>,
) {}
async findAll(query?: any) {
const { page = 1, limit = 10, source, keyword } = query || {};
const qb = this.bidRepository.createQueryBuilder('bid');
if (source) {
qb.andWhere('bid.source = :source', { source });
}
if (keyword) {
qb.andWhere('bid.title LIKE :keyword', { keyword: `%${keyword}%` });
}
qb.orderBy('bid.publishDate', 'DESC')
.skip((page - 1) * limit)
.take(limit);
const [items, total] = await qb.getManyAndCount();
return { items, total };
}
getHighPriority() {
return this.bidRepository.find({
where: { priority: LessThan(0) }, // This is just a placeholder logic, priority should be > 0
order: { priority: 'DESC', publishDate: 'DESC' },
take: 10,
});
}
// Update logic for priority
async getHighPriorityCorrected() {
return this.bidRepository.createQueryBuilder('bid')
.where('bid.priority > 0')
.orderBy('bid.priority', 'DESC')
.addOrderBy('bid.publishDate', 'DESC')
.limit(10)
.getMany();
}
async createOrUpdate(data: Partial<BidItem>) {
// Use URL or a hash of URL to check for duplicates
let item = await this.bidRepository.findOne({ where: { url: data.url } });
if (item) {
Object.assign(item, data);
return this.bidRepository.save(item);
}
return this.bidRepository.save(data);
}
async cleanOldData() {
const thirtyDaysAgo = new Date();
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
return this.bidRepository.delete({
createdAt: LessThan(thirtyDaysAgo),
});
}
}

View File

@@ -0,0 +1,21 @@
import { Controller, Post } from '@nestjs/common';
import { BidCrawlerService } from './services/bid-crawler.service';
@Controller('api/crawler')
export class CrawlerController {
constructor(private readonly crawlerService: BidCrawlerService) {}
@Post('run')
async runCrawl() {
// We don't await this because we want it to run in the background
// and return immediately, or we can await if we want the user to wait.
// Given the requirement "Immediate Crawl", usually implies triggering it.
// However, for a better UI experience, we might want to wait or just trigger.
// Let's await it so the user knows when it's done (or failed),
// assuming it doesn't take too long for the mock.
// Real crawling might take long, so background is better.
// For this prototype, I'll await it to show completion.
await this.crawlerService.crawlAll();
return { message: 'Crawl completed successfully' };
}
}

View File

@@ -0,0 +1,12 @@
import { Module } from '@nestjs/common';
import { BidCrawlerService } from './services/bid-crawler.service';
import { CrawlerController } from './crawler.controller';
import { BidsModule } from '../bids/bids.module';
@Module({
imports: [BidsModule],
controllers: [CrawlerController],
providers: [BidCrawlerService],
exports: [BidCrawlerService],
})
export class CrawlerModule {}

View File

@@ -0,0 +1,46 @@
import { Injectable, Logger } from '@nestjs/common';
import * as puppeteer from 'puppeteer';
import { BidsService } from '../../bids/services/bid.service';
import { ChdtpCrawler } from './chdtp_target';
@Injectable()
export class BidCrawlerService {
private readonly logger = new Logger(BidCrawlerService.name);
constructor(
private bidsService: BidsService,
) {}
async crawlAll() {
this.logger.log('Starting crawl task with Puppeteer...');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
try {
// Currently only supports ChdtpCrawler, but can be extended to a list of crawlers
const crawler = ChdtpCrawler;
this.logger.log(`Crawling: ${crawler.name}`);
const results = await crawler.crawl(browser);
this.logger.log(`Extracted ${results.length} items from ${crawler.name}`);
for (const item of results) {
await this.bidsService.createOrUpdate({
title,
url: itemUrl,
publishDate,
source: type || 'Unknown',
});
}
} catch (error) {
this.logger.error(`Crawl task failed: ${error.message}`);
} finally {
await browser.close();
this.logger.log('Crawl task finished.');
}
}
}

View File

@@ -0,0 +1,51 @@
import { ChdtpCrawler } from './chdtp_target';
import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000);
describe('ChdtpCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
browser = await puppeteer.launch({
headless: true, // Change to false to see the browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
});
afterAll(async () => {
if (browser) {
await browser.close();
}
});
it('should visit the website and list all found bid information', async () => {
console.log(`\nStarting crawl for: ${ChdtpCrawler.name}`);
console.log(`Target URL: ${ChdtpCrawler.url}`);
const results = await ChdtpCrawler.crawl(browser);
console.log(`\nSuccessfully found ${results.length} items:\n`);
console.log('----------------------------------------');
results.forEach((item, index) => {
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
console.log(` Link: ${item.url}`);
console.log('----------------------------------------');
});
// Basic assertions to ensure the crawler is working
expect(results).toBeDefined();
expect(Array.isArray(results)).toBeTruthy();
// Warn but don't fail if site returns 0 items (could be empty or changed structure)
if (results.length === 0) {
console.warn('Warning: No items found. Check if the website structure has changed or if the list is currently empty.');
} else {
// Check data integrity of the first item
const firstItem = results[0];
expect(firstItem.title).toBeTruthy();
expect(firstItem.url).toMatch(/^https?:\/\//);
expect(firstItem.publishDate).toBeInstanceOf(Date);
}
});
});

View File

@@ -0,0 +1,110 @@
import * as puppeteer from 'puppeteer';
import { Logger } from '@nestjs/common';
export interface ChdtpResult {
title: string;
publishDate: Date;
url: string; // Necessary for system uniqueness
}
export const ChdtpCrawler = {
name: '中国华能集团',
url: 'https://www.chdtp.com/webs/queryWebZbgg.action?zbggType=1',
baseUrl: 'https://www.chdtp.com/webs/',
async crawl(browser: puppeteer.Browser): Promise<ChdtpResult[]> {
const logger = new Logger('ChdtpCrawler');
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36');
const allResults: ChdtpResult[] = [];
let currentPage = 1;
const maxPages = 5; // Safety limit to prevent infinite loops during testing
try {
logger.log(`Navigating to ${this.url}...`);
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
while (currentPage <= maxPages) {
const content = await page.content();
const pageResults = this.extract(content);
if (pageResults.length === 0) {
logger.warn(`No results found on page ${currentPage}, stopping.`);
break;
}
allResults.push(...pageResults);
logger.log(`Extracted ${pageResults.length} items from page ${currentPage}`);
// Find the "Next Page" button
// Using partial match for src to be robust against path variations
const nextButtonSelector = 'input[type="image"][src*="page-next.png"]';
const nextButton = await page.$(nextButtonSelector);
if (!nextButton) {
logger.log('Next page button not found. Reached end of list.');
break;
}
// Optional: Check if the button is disabled (though image inputs usually aren't "disabled" in the same way)
// For this specific site, we'll try to click.
logger.log(`Navigating to page ${currentPage + 1}...`);
try {
await Promise.all([
page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }),
nextButton.click(),
]);
} catch (navError) {
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
break;
}
currentPage++;
// Random delay between pages
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
await new Promise(resolve => setTimeout(resolve, delay));
}
return allResults;
} catch (error) {
logger.error(`Failed to crawl ${this.name}: ${error.message}`);
return allResults; // Return what we have so far
} finally {
await page.close();
}
},
extract(html: string): ChdtpResult[] {
const results: ChdtpResult[] = [];
/**
* Regex groups for chdtp.com:
* 1: Status
* 2: URL suffix
* 3: Title
* 4: Business Type
* 5: Date
*/
const regex = /<tr[^>]*>\s*<td class="td_1">.*?<span[^>]*>\s*(.*?)\s*<\/span>.*?<\/td>\s*<td class="td_2">\s*<a[^>]*href="javascript:toGetContent\('(.*?)'\)" title="(.*?)">.*?<\/a><\/td>\s*<td class="td_3">\s*<a[^>]*>\s*(.*?)\s*<\/a>\s*<\/td>\s*<td class="td_4"><span>\[(.*?)\]<\/span><\/td>/gs;
let match;
while ((match = regex.exec(html)) !== null) {
const urlSuffix = match[2]?.trim();
const title = match[3]?.trim();
const dateStr = match[5]?.trim();
if (title && urlSuffix) {
results.push({
title,
publishDate: dateStr ? new Date(dateStr) : new Date(),
url: this.baseUrl + urlSuffix
});
}
}
return results;
}
};

View File

@@ -0,0 +1,25 @@
import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { ConfigModule, ConfigService } from '@nestjs/config';
import { BidItem } from '../bids/entities/bid-item.entity';
import { Keyword } from '../keywords/keyword.entity';
@Module({
imports: [
TypeOrmModule.forRootAsync({
imports: [ConfigModule],
inject: [ConfigService],
useFactory: (configService: ConfigService) => ({
type: configService.get<any>('DATABASE_TYPE', 'mariadb'),
host: configService.get<string>('DATABASE_HOST', 'localhost'),
port: configService.get<number>('DATABASE_PORT', 3306),
username: configService.get<string>('DATABASE_USERNAME', 'root'),
password: configService.get<string>('DATABASE_PASSWORD', 'root'),
database: configService.get<string>('DATABASE_NAME', 'bidding'),
entities: [BidItem, Keyword],
synchronize: configService.get<boolean>('DATABASE_SYNCHRONIZE', true),
}),
}),
],
})
export class DatabaseModule {}

View File

@@ -0,0 +1,19 @@
import { Entity, PrimaryGeneratedColumn, Column, CreateDateColumn, UpdateDateColumn } from 'typeorm';
@Entity('keywords')
export class Keyword {
@PrimaryGeneratedColumn('uuid')
id: string;
@Column({ unique: true })
word: string;
@Column({ default: 1 })
weight: number; // 1-5级
@CreateDateColumn()
createdAt: Date;
@UpdateDateColumn()
updatedAt: Date;
}

View File

@@ -0,0 +1,22 @@
import { Controller, Get, Post, Body, Delete, Param } from '@nestjs/common';
import { KeywordsService } from './keywords.service';
@Controller('api/keywords')
export class KeywordsController {
constructor(private readonly keywordsService: KeywordsService) {}
@Get()
findAll() {
return this.keywordsService.findAll();
}
@Post()
create(@Body('word') word: string, @Body('weight') weight: number) {
return this.keywordsService.create(word, weight);
}
@Delete(':id')
remove(@Param('id') id: string) {
return this.keywordsService.remove(id);
}
}

View File

@@ -0,0 +1,13 @@
import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { Keyword } from './keyword.entity';
import { KeywordsService } from './keywords.service';
import { KeywordsController } from './keywords.controller';
@Module({
imports: [TypeOrmModule.forFeature([Keyword])],
providers: [KeywordsService],
controllers: [KeywordsController],
exports: [KeywordsService],
})
export class KeywordsModule {}

View File

@@ -0,0 +1,35 @@
import { Injectable, OnModuleInit } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { Keyword } from './keyword.entity';
@Injectable()
export class KeywordsService implements OnModuleInit {
constructor(
@InjectRepository(Keyword)
private keywordRepository: Repository<Keyword>,
) {}
async onModuleInit() {
// 初始预设关键词
const defaultKeywords = ["山东", "海", "建设", "工程", "采购"];
for (const word of defaultKeywords) {
const exists = await this.keywordRepository.findOne({ where: { word } });
if (!exists) {
await this.keywordRepository.save({ word, weight: 1 });
}
}
}
findAll() {
return this.keywordRepository.find();
}
create(word: string, weight: number = 1) {
return this.keywordRepository.save({ word, weight });
}
remove(id: string) {
return this.keywordRepository.delete(id);
}
}

8
src/main.ts Normal file
View File

@@ -0,0 +1,8 @@
import { NestFactory } from '@nestjs/core';
import { AppModule } from './app.module';
async function bootstrap() {
const app = await NestFactory.create(AppModule);
await app.listen(process.env.PORT ?? 3000);
}
bootstrap();

View File

@@ -0,0 +1,10 @@
import { Module } from '@nestjs/common';
import { BidCrawlTask } from './tasks/bid-crawl.task';
import { CrawlerModule } from '../crawler/crawler.module';
import { BidsModule } from '../bids/bids.module';
@Module({
imports: [CrawlerModule, BidsModule],
providers: [BidCrawlTask],
})
export class TasksModule {}

View File

@@ -0,0 +1,26 @@
import { Injectable, Logger } from '@nestjs/common';
import { Cron, CronExpression } from '@nestjs/schedule';
import { BidCrawlerService } from '../../crawler/services/bid-crawler.service';
import { BidsService } from '../../bids/services/bid.service';
@Injectable()
export class BidCrawlTask {
private readonly logger = new Logger(BidCrawlTask.name);
constructor(
private crawlerService: BidCrawlerService,
private bidsService: BidsService,
) {}
@Cron(CronExpression.EVERY_30_MINUTES)
async handleCron() {
this.logger.debug('Scheduled crawl task started');
await this.crawlerService.crawlAll();
}
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
async handleCleanup() {
this.logger.debug('Scheduled cleanup task started');
await this.bidsService.cleanOldData();
}
}