From eba5c7e5c5a1bc9d376d4ed116a5dd8d65179c1a Mon Sep 17 00:00:00 2001 From: dmy Date: Thu, 15 Jan 2026 15:34:00 +0800 Subject: [PATCH] feat(crawler): add human-like behavior simulation to prevent detection refactor(manifest): update appid and format manifest file --- frontend/src/components/CrawlInfo.vue | 27 ++++- src/crawler/services/chdtp_target.ts | 63 +++++++++++- uni-app-version/src/manifest.json | 140 +++++++++++++------------- 3 files changed, 155 insertions(+), 75 deletions(-) diff --git a/frontend/src/components/CrawlInfo.vue b/frontend/src/components/CrawlInfo.vue index d8c6951..58172e7 100644 --- a/frontend/src/components/CrawlInfo.vue +++ b/frontend/src/components/CrawlInfo.vue @@ -74,7 +74,7 @@ diff --git a/src/crawler/services/chdtp_target.ts b/src/crawler/services/chdtp_target.ts index 598b41a..fc44b03 100644 --- a/src/crawler/services/chdtp_target.ts +++ b/src/crawler/services/chdtp_target.ts @@ -1,10 +1,50 @@ import * as puppeteer from 'puppeteer'; import { Logger } from '@nestjs/common'; +async function simulateHumanMouseMovement(page: puppeteer.Page) { + const viewport = page.viewport(); + if (!viewport) return; + + const movements = 5 + Math.floor(Math.random() * 5); + + for (let i = 0; i < movements; i++) { + const x = Math.floor(Math.random() * viewport.width); + const y = Math.floor(Math.random() * viewport.height); + + await page.mouse.move(x, y, { + steps: 10 + Math.floor(Math.random() * 20), + }); + + await new Promise((r) => setTimeout(r, 100 + Math.random() * 400)); + } +} + +async function simulateHumanScrolling(page: puppeteer.Page) { + const scrollCount = 3 + Math.floor(Math.random() * 5); + + for (let i = 0; i < scrollCount; i++) { + const scrollDistance = 100 + Math.floor(Math.random() * 400); + + await page.evaluate((distance) => { + window.scrollBy({ + top: distance, + behavior: 'smooth', + }); + }, scrollDistance); + + await new Promise((r) => setTimeout(r, 500 + Math.random() * 1000)); + } + + await page.evaluate(() => { + window.scrollTo({ top: 0, behavior: 'smooth' }); + }); + await new Promise((r) => setTimeout(r, 1000)); +} + export interface ChdtpResult { title: string; publishDate: Date; - url: string; // Necessary for system uniqueness + url: string; } interface ChdtpCrawlerType { @@ -101,6 +141,12 @@ export const ChdtpCrawler = { logger, ); + logger.log('Simulating human mouse movements...'); + await simulateHumanMouseMovement(page); + + logger.log('Simulating human scrolling...'); + await simulateHumanScrolling(page); + while (currentPage <= maxPages) { const content = await page.content(); const pageResults = this.extract(content); @@ -115,6 +161,12 @@ export const ChdtpCrawler = { `Extracted ${pageResults.length} items from page ${currentPage}`, ); + logger.log('Simulating human mouse movements before pagination...'); + await simulateHumanMouseMovement(page); + + logger.log('Simulating human scrolling before pagination...'); + await simulateHumanScrolling(page); + // Find the "Next Page" button // Using partial match for src to be robust against path variations const nextButtonSelector = 'input[type="image"][src*="page-next.png"]'; @@ -125,9 +177,6 @@ export const ChdtpCrawler = { break; } - // Optional: Check if the button is disabled (though image inputs usually aren't "disabled" in the same way) - // For this specific site, we'll try to click. - logger.log(`Navigating to page ${currentPage + 1}...`); try { @@ -149,6 +198,12 @@ export const ChdtpCrawler = { currentPage++; + logger.log('Simulating human mouse movements after pagination...'); + await simulateHumanMouseMovement(page); + + logger.log('Simulating human scrolling after pagination...'); + await simulateHumanScrolling(page); + // Random delay between pages const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000; await new Promise((resolve) => setTimeout(resolve, delay)); diff --git a/uni-app-version/src/manifest.json b/uni-app-version/src/manifest.json index 04578ab..3c3969e 100644 --- a/uni-app-version/src/manifest.json +++ b/uni-app-version/src/manifest.json @@ -1,73 +1,75 @@ { - "name": "bidding-looker", - "appid": "__UNI__BIDDING_LOOKER", - "description": "投标项目查看器", - "versionName": "1.0.0", - "versionCode": "100", - "transformPx": false, - "app-plus": { - "usingComponents": true, - "nvueStyleCompiler": "uni-app", - "compilerVersion": 3, - "splashscreen": { - "alwaysShowBeforeRender": true, - "waiting": true, - "autoclose": true, - "delay": 0 + "name" : "bidding-looker", + "appid" : "__UNI__1D1820F", + "description" : "投标项目查看器", + "versionName" : "1.0.0", + "versionCode" : "100", + "transformPx" : false, + "app-plus" : { + "usingComponents" : true, + "nvueStyleCompiler" : "uni-app", + "compilerVersion" : 3, + "splashscreen" : { + "alwaysShowBeforeRender" : true, + "waiting" : true, + "autoclose" : true, + "delay" : 0 + }, + "modules" : {}, + "distribute" : { + "android" : { + "permissions" : [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ] + }, + "ios" : { + "dSYMs" : false + }, + "sdkConfigs" : {} + } }, - "modules": {}, - "distribute": { - "android": { - "permissions": [ - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "" - ] - }, - "ios": {}, - "sdkConfigs": {} + "quickapp" : {}, + "mp-weixin" : { + "appid" : "", + "setting" : { + "urlCheck" : false + }, + "usingComponents" : true + }, + "mp-alipay" : { + "usingComponents" : true + }, + "mp-baidu" : { + "usingComponents" : true + }, + "mp-toutiao" : { + "usingComponents" : true + }, + "uniStatistics" : { + "enable" : false + }, + "vueVersion" : "3", + "h5" : { + "router" : { + "mode" : "hash", + "base" : "/" + }, + "devServer" : { + "port" : 8080, + "disableHostCheck" : true + } } - }, - "quickapp": {}, - "mp-weixin": { - "appid": "", - "setting": { - "urlCheck": false - }, - "usingComponents": true - }, - "mp-alipay": { - "usingComponents": true - }, - "mp-baidu": { - "usingComponents": true - }, - "mp-toutiao": { - "usingComponents": true - }, - "uniStatistics": { - "enable": false - }, - "vueVersion": "3", - "h5": { - "router": { - "mode": "hash", - "base": "/" - }, - "devServer": { - "port": 8080, - "disableHostCheck": true - } - } }