From 4f37b0fb616c38dcad58c7d3e59e8ad724d7ead4 Mon Sep 17 00:00:00 2001 From: dmy Date: Mon, 12 Jan 2026 14:37:18 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=E9=87=8D=E6=9E=84=E5=89=8D?= =?UTF-8?q?=E7=AB=AF=E4=BB=A3=E7=A0=81=EF=BC=8C=E6=8B=86=E5=88=86=E7=BB=84?= =?UTF-8?q?=E4=BB=B6=E5=B9=B6=E4=BC=98=E5=8C=96README=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 100 ++++-- frontend/src/App.vue | 433 +++---------------------- frontend/src/components/Bids.vue | 78 +++++ frontend/src/components/Dashboard.vue | 279 ++++++++++++++++ frontend/src/components/HelloWorld.vue | 41 --- frontend/src/components/Keywords.vue | 107 ++++++ src/crawler/services/cdt_target.ts | 93 ++++-- src/crawler/services/espic_target.ts | 2 +- src/schedule/tasks/bid-crawl.task.ts | 4 +- 9 files changed, 660 insertions(+), 477 deletions(-) create mode 100644 frontend/src/components/Bids.vue create mode 100644 frontend/src/components/Dashboard.vue delete mode 100644 frontend/src/components/HelloWorld.vue create mode 100644 frontend/src/components/Keywords.vue diff --git a/README.md b/README.md index 3d44be4..2b469f5 100644 --- a/README.md +++ b/README.md @@ -97,26 +97,86 @@ Nest is an MIT-licensed open source project. It can grow thanks to the sponsors Nest is [MIT licensed](https://github.com/nestjs/nest/blob/master/LICENSE). - How to Run: - 1. Database Setup: Update the .env file with your PostgreSQL credentials. +## How to Run - 1 DATABASE_TYPE=postgres - 2 DATABASE_HOST=localhost - 3 DATABASE_PORT=5432 - 4 DATABASE_USERNAME=your_username - 5 DATABASE_PASSWORD=your_password - 6 DATABASE_NAME=bidding - 7 DATABASE_SYNCHRONIZE=true - 2. Install Dependencies: - 1 npm install - 2 cd frontend && npm install - 3. Build and Start: +### 1. Database Setup +Update the `.env` file with your PostgreSQL credentials: - 1 # From the root directory - 2 cd frontend && npm run build - 3 cd .. - 4 npm run build - 5 npm run start +```env +DATABASE_TYPE=postgres +DATABASE_HOST=localhost +DATABASE_PORT=5432 +DATABASE_USERNAME=your_username +DATABASE_PASSWORD=your_password +DATABASE_NAME=bidding +DATABASE_SYNCHRONIZE=true +``` - The system will automatically initialize with the preset keywords: "山东", "海", "建设", "工程", "采购". You can - manage these and view crawled bidding information at http://localhost:3000. \ No newline at end of file +### 2. Install Dependencies + +```bash +npm install +cd frontend && npm install +``` + +### 3. Build and Start + +```bash +# From the root directory +cd frontend && npm run build +cd .. +npm run build +npm run start +``` + +## Features + +### Frontend Features + +- **Dashboard**: View high priority bids and today's bids +- **Date Filtering**: + - Click "3天" or "7天" buttons to filter bids from the last 3 or 7 days + - The filter only limits the start date, showing all data from the selected start date onwards (including data newer than the end date) +- **Keyword Filtering**: Filter bids by keywords (saved in localStorage) +- **All Bids**: View all bids with pagination and source filtering +- **Keyword Management**: Add and delete keywords with weight-based priority + +### Backend Features + +- **Multi-Source Crawling**: Crawls bidding information from multiple sources: + - ChdtpCrawler + - ChngCrawler + - SzecpCrawler + - CdtCrawler + - EpsCrawler + - CnncecpCrawler + - CgnpcCrawler + - CeicCrawler + - EspicCrawler + - PowerbeijingCrawler +- **Automatic Retry**: If a crawler returns 0 items, it will be retried after all crawlers complete +- **Proxy Support**: Configurable proxy settings via environment variables +- **Scheduled Tasks**: Automatic crawling at scheduled intervals + +### Environment Variables + +```env +# Database +DATABASE_TYPE=postgres +DATABASE_HOST=localhost +DATABASE_PORT=5432 +DATABASE_USERNAME=your_username +DATABASE_PASSWORD=your_password +DATABASE_NAME=bidding +DATABASE_SYNCHRONIZE=true + +# Proxy (optional) +PROXY_HOST=your_proxy_host +PROXY_PORT=your_proxy_port +PROXY_USERNAME=your_proxy_username +PROXY_PASSWORD=your_proxy_password +``` + +## Initial Setup + +The system will automatically initialize with the preset keywords: "山东", "海", "建设", "工程", "采购". You can manage these and view crawled bidding information at http://localhost:3000. \ No newline at end of file diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 24be742..96de7ce 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -31,167 +31,43 @@ -
-
-

Dashboard

- - - 立刻抓取 - -
- - - - - - - - - - - - - - - - - -
-

Today's Bids

-
- - 3天 - 7天 - - - -
-
- - - - - - - - - -
+ -
-
-

All Bids

- - - -
- - - - - - - - - - -
+ -
-
-

Keyword Management

- Add Keyword -
- -
- - {{ keyword.word }} - - -
-
+
- - - - - - - - - - - - @@ -486,9 +156,4 @@ onMounted(() => { font-size: 18px; background-color: #434a50; } -.card-header { - display: flex; - justify-content: space-between; - align-items: center; -} - \ No newline at end of file + diff --git a/frontend/src/components/Bids.vue b/frontend/src/components/Bids.vue new file mode 100644 index 0000000..5cf15a1 --- /dev/null +++ b/frontend/src/components/Bids.vue @@ -0,0 +1,78 @@ + + + diff --git a/frontend/src/components/Dashboard.vue b/frontend/src/components/Dashboard.vue new file mode 100644 index 0000000..d87ec1c --- /dev/null +++ b/frontend/src/components/Dashboard.vue @@ -0,0 +1,279 @@ + + + + + diff --git a/frontend/src/components/HelloWorld.vue b/frontend/src/components/HelloWorld.vue deleted file mode 100644 index b58e52b..0000000 --- a/frontend/src/components/HelloWorld.vue +++ /dev/null @@ -1,41 +0,0 @@ - - - - - diff --git a/frontend/src/components/Keywords.vue b/frontend/src/components/Keywords.vue new file mode 100644 index 0000000..cacaa47 --- /dev/null +++ b/frontend/src/components/Keywords.vue @@ -0,0 +1,107 @@ + + + + + diff --git a/src/crawler/services/cdt_target.ts b/src/crawler/services/cdt_target.ts index bb4ae61..6ae6697 100644 --- a/src/crawler/services/cdt_target.ts +++ b/src/crawler/services/cdt_target.ts @@ -11,11 +11,11 @@ async function simulateHumanMouseMovement(page: puppeteer.Page) { for (let i = 0; i < movements; i++) { const x = Math.floor(Math.random() * viewport.width); const y = Math.floor(Math.random() * viewport.height); - + await page.mouse.move(x, y, { steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑 }); - + // 随机停顿 100-500ms await new Promise(r => setTimeout(r, 100 + Math.random() * 400)); } @@ -27,7 +27,7 @@ async function simulateHumanScrolling(page: puppeteer.Page) { for (let i = 0; i < scrollCount; i++) { const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px - + await page.evaluate((distance) => { window.scrollBy({ top: distance, @@ -80,7 +80,7 @@ export const CdtCrawler = { // 模拟人类行为 logger.log('Simulating human mouse movements...'); await simulateHumanMouseMovement(page); - + logger.log('Simulating human scrolling...'); await simulateHumanScrolling(page); @@ -103,7 +103,7 @@ export const CdtCrawler = { // 模拟人类行为 logger.log('Simulating human mouse movements...'); await simulateHumanMouseMovement(page); - + logger.log('Simulating human scrolling...'); await simulateHumanScrolling(page); @@ -113,7 +113,7 @@ export const CdtCrawler = { const titles = Array.from(document.querySelectorAll('span.h-notice-title')); return titles.some(title => title.textContent && title.textContent.includes('招标公告')); }, { timeout: 30000 }); - + await page.evaluate(() => { const titles = Array.from(document.querySelectorAll('span.h-notice-title')); const targetTitle = titles.find(title => title.textContent && title.textContent.includes('招标公告')); @@ -133,13 +133,22 @@ export const CdtCrawler = { // 模拟人类行为 logger.log('Simulating human mouse movements...'); await simulateHumanMouseMovement(page); - + logger.log('Simulating human scrolling...'); await simulateHumanScrolling(page); + // 等待表格加载完成 + logger.log('Waiting for table to load...'); + await page.waitForSelector('table.layui-table', { timeout: 30000 }); + while (currentPage <= maxPages) { + // 等待表格数据加载 + await page.waitForSelector('tbody tr', { timeout: 10000 }); + + // 获取当前页面的 HTML 内容 const content = await page.content(); const pageResults = this.extract(content); + if (pageResults.length === 0) { logger.warn(`No results found on page ${currentPage}, stopping.`); break; @@ -151,43 +160,69 @@ export const CdtCrawler = { // 模拟人类行为 - 翻页前 logger.log('Simulating human mouse movements before pagination...'); await simulateHumanMouseMovement(page); - + logger.log('Simulating human scrolling before pagination...'); await simulateHumanScrolling(page); - // Find the "Next Page" button - layui pagination + // 查找下一页按钮 const nextButtonSelector = 'a.layui-laypage-next:not(.layui-disabled)'; - const nextButton = await page.$(nextButtonSelector); + const nextButtonExists = await page.evaluate((selector) => { + const btn = document.querySelector(selector); + return btn !== null && !btn.classList.contains('layui-disabled'); + }, nextButtonSelector); - if (!nextButton) { - logger.log('Next page button not found. Reached end of list.'); + if (!nextButtonExists) { + logger.log('Next page button not found or disabled. Reached end of list.'); break; } logger.log(`Navigating to page ${currentPage + 1}...`); try { - await Promise.all([ - page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }), - nextButton.click(), - ]); + // 点击下一页按钮 + await page.evaluate((selector) => { + const btn = document.querySelector(selector) as HTMLElement; + if (btn) btn.click(); + }, nextButtonSelector); + + // 等待 AJAX 请求完成(通过监听网络请求) + await page.waitForFunction(() => { + // 检查表格是否正在加载 + const loading = document.querySelector('.layui-table-loading'); + return !loading; + }, { timeout: 30000 }).catch(() => {}); + + // 额外等待确保数据加载完成 + await new Promise(r => setTimeout(r, 2000)); + + // 检查是否真的翻页了(通过检查当前页码) + const currentActivePage = await page.evaluate(() => { + const activeSpan = document.querySelector('.layui-laypage-curr em:last-child'); + return activeSpan ? parseInt(activeSpan.textContent || '1') : 1; + }); + + if (currentActivePage <= currentPage) { + logger.log('Page did not change, stopping.'); + break; + } + + currentPage++; + + // 模拟人类行为 - 翻页后 + logger.log('Simulating human mouse movements after pagination...'); + await simulateHumanMouseMovement(page); + + logger.log('Simulating human scrolling after pagination...'); + await simulateHumanScrolling(page); + + // Random delay between pages + const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000; + await new Promise(resolve => setTimeout(resolve, delay)); + } catch (navError) { logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`); break; } - - currentPage++; - - // 模拟人类行为 - 翻页后 - logger.log('Simulating human mouse movements after pagination...'); - await simulateHumanMouseMovement(page); - - logger.log('Simulating human scrolling after pagination...'); - await simulateHumanScrolling(page); - - // Random delay between pages - const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000; - await new Promise(resolve => setTimeout(resolve, delay)); } return allResults; diff --git a/src/crawler/services/espic_target.ts b/src/crawler/services/espic_target.ts index 0b7959a..8aff546 100644 --- a/src/crawler/services/espic_target.ts +++ b/src/crawler/services/espic_target.ts @@ -53,7 +53,7 @@ export interface EspicResult { } export const EspicCrawler = { - name: '电能e招采平台', + name: '电能e招采平台(国电投)', baseUrl: 'https://ebid.espic.com.cn', // 生成动态 URL,使用当前日期 diff --git a/src/schedule/tasks/bid-crawl.task.ts b/src/schedule/tasks/bid-crawl.task.ts index cc5a63b..61254ed 100644 --- a/src/schedule/tasks/bid-crawl.task.ts +++ b/src/schedule/tasks/bid-crawl.task.ts @@ -14,8 +14,8 @@ export class BidCrawlTask { @Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT) async handleCron() { - this.logger.debug('Scheduled crawl task started'); - await this.crawlerService.crawlAll(); + // this.logger.debug('Scheduled crawl task started'); + // await this.crawlerService.crawlAll(); } @Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)