From 044fd770f7accbdf9d708f7195145da8836edf8d Mon Sep 17 00:00:00 2001 From: dmy Date: Sun, 11 Jan 2026 21:35:24 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=BC=BA=E5=8D=8E=E8=83=BD?= =?UTF-8?q?=E7=94=B5=E5=95=86=E5=B9=B3=E5=8F=B0=E7=88=AC=E8=99=AB=E7=9A=84?= =?UTF-8?q?=E5=8F=8D=E6=A3=80=E6=B5=8B=E8=83=BD=E5=8A=9B=E5=92=8C=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 BidItem 实体中添加 priority 和 unit 字段,扩展数据结构 - 将爬虫浏览器模式改为非无头模式(headless: false)便于调试 - 为 ChngCrawler 添加人类行为模拟功能: * 模拟鼠标随机移动,增加移动步数和停顿时间 * 模拟人类滚动行为,包括随机滚动距离和停顿 * 添加 navigator 属性伪装,包括语言、插件等 - 在关键节点添加截图功能(bing.png, newPage.png, huaneng.png)用于调试 - 优化反检测策略,降低被目标网站识别为机器人的风险 --- src/bids/entities/bid-item.entity.ts | 6 ++ src/crawler/services/bid-crawler.service.ts | 2 +- src/crawler/services/chng_target.ts | 73 ++++++++++++++++++++- 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src/bids/entities/bid-item.entity.ts b/src/bids/entities/bid-item.entity.ts index 7b793f8..5e012d1 100644 --- a/src/bids/entities/bid-item.entity.ts +++ b/src/bids/entities/bid-item.entity.ts @@ -20,6 +20,12 @@ export class BidItem { @Column({ default: false }) isRead: boolean; + @Column({ default: 0 }) + priority: number; + + @Column({ nullable: true }) + unit: string; + @CreateDateColumn() createdAt: Date; diff --git a/src/crawler/services/bid-crawler.service.ts b/src/crawler/services/bid-crawler.service.ts index 8f4d973..9aaf147 100644 --- a/src/crawler/services/bid-crawler.service.ts +++ b/src/crawler/services/bid-crawler.service.ts @@ -43,7 +43,7 @@ export class BidCrawlerService { } const browser = await puppeteer.launch({ - headless: true, + headless: false, args, }); diff --git a/src/crawler/services/chng_target.ts b/src/crawler/services/chng_target.ts index d51de6e..8eb8710 100644 --- a/src/crawler/services/chng_target.ts +++ b/src/crawler/services/chng_target.ts @@ -2,6 +2,51 @@ import * as puppeteer from 'puppeteer'; import { Logger } from '@nestjs/common'; import { ChdtpResult } from './chdtp_target'; +// 模拟人类鼠标移动 +async function simulateHumanMouseMovement(page: puppeteer.Page) { + const viewport = page.viewport(); + if (!viewport) return; + + const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动 + + for (let i = 0; i < movements; i++) { + const x = Math.floor(Math.random() * viewport.width); + const y = Math.floor(Math.random() * viewport.height); + + await page.mouse.move(x, y, { + steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑 + }); + + // 随机停顿 100-500ms + await new Promise(r => setTimeout(r, 100 + Math.random() * 400)); + } +} + +// 模拟人类滚动 +async function simulateHumanScrolling(page: puppeteer.Page) { + const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动 + + for (let i = 0; i < scrollCount; i++) { + const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px + + await page.evaluate((distance) => { + window.scrollBy({ + top: distance, + behavior: 'smooth' + }); + }, scrollDistance); + + // 随机停顿 500-1500ms + await new Promise(r => setTimeout(r, 500 + Math.random() * 1000)); + } + + // 滚动回顶部 + await page.evaluate(() => { + window.scrollTo({ top: 0, behavior: 'smooth' }); + }); + await new Promise(r => setTimeout(r, 1000)); +} + export const ChngCrawler = { name: '华能集团电子商务平台', url: 'https://ec.chng.com.cn/ecmall/index.html#/purchase/home?top=0', @@ -10,7 +55,8 @@ export const ChngCrawler = { async crawl(browser: puppeteer.Browser): Promise { const logger = new Logger('ChngCrawler'); let page = await browser.newPage(); - + // await page.setViewport({ width: 1920, height: 1080, deviceScaleFactor: 1 }); + // await page.setViewport({ deviceScaleFactor: 1 }); const username = process.env.PROXY_USERNAME; const password = process.env.PROXY_PASSWORD; if (username && password) { @@ -19,6 +65,8 @@ export const ChngCrawler = { await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); + Object.defineProperty(navigator, 'language', { get: () => "zh-CN"}); + Object.defineProperty(navigator, 'plugins', { get: () => [1,2,3,4,5]}); }); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36'); @@ -40,6 +88,7 @@ export const ChngCrawler = { await page.waitForNavigation({ waitUntil: 'networkidle2' }); logger.log('Clicking search result...'); + await page.screenshot({ path: 'bing.png' }); const firstResultSelector = '#b_results .b_algo h2 a'; await page.waitForSelector(firstResultSelector); @@ -50,18 +99,33 @@ export const ChngCrawler = { const newPage = await newTarget.page(); if (newPage) { + await newPage.screenshot({ path: 'newPage.png' }); await page.close(); page = newPage; if (username && password) { await page.authenticate({ username, password }); } } + // 模拟人类行为 + logger.log('Simulating human mouse movements...'); + await simulateHumanMouseMovement(page); + + logger.log('Simulating human scrolling...'); + await simulateHumanScrolling(page); await page.waitForNavigation({ waitUntil: 'domcontentloaded' }).catch(() => {}); + // 模拟人类行为 + logger.log('Simulating human mouse movements...'); + await simulateHumanMouseMovement(page); + + logger.log('Simulating human scrolling...'); + await simulateHumanScrolling(page); + // PAUSE 15 SECONDS as requested logger.log('Pausing 15 seconds before looking for "采购专栏"...'); await new Promise(r => setTimeout(r, 15000)); + await page.screenshot({ path: 'huaneng.png' }); logger.log('Looking for "采购专栏" link...'); await page.waitForFunction(() => { @@ -92,6 +156,13 @@ export const ChngCrawler = { logger.log(`Active URL: ${page.url()}`); + // 模拟人类行为 + logger.log('Simulating human mouse movements...'); + await simulateHumanMouseMovement(page); + + logger.log('Simulating human scrolling...'); + await simulateHumanScrolling(page); + while (currentPage <= maxPages) { logger.log(`Processing page ${currentPage}...`);