feat(crawler): add human-like behavior simulation to prevent detection

refactor(manifest): update appid and format manifest file
This commit is contained in:
dmy
2026-01-15 15:34:00 +08:00
parent 36cbb6fda1
commit eba5c7e5c5
3 changed files with 155 additions and 75 deletions

View File

@@ -74,7 +74,7 @@
</template> </template>
<script setup lang="ts"> <script setup lang="ts">
import { ref, computed, onMounted } from 'vue' import { ref, computed, onMounted, onBeforeUnmount } from 'vue'
import api from '../utils/api' import api from '../utils/api'
import { ElMessage } from 'element-plus' import { ElMessage } from 'element-plus'
import { Refresh } from '@element-plus/icons-vue' import { Refresh } from '@element-plus/icons-vue'
@@ -90,6 +90,8 @@ interface CrawlStat {
const crawlStats = ref<CrawlStat[]>([]) const crawlStats = ref<CrawlStat[]>([])
const loading = ref(false) const loading = ref(false)
const crawlingSources = ref<Set<string>>(new Set()) const crawlingSources = ref<Set<string>>(new Set())
const REFRESH_INTERVAL = 10000
let refreshTimer: number | null = null
const totalCount = computed(() => { const totalCount = computed(() => {
return crawlStats.value.reduce((sum, item) => sum + item.count, 0) return crawlStats.value.reduce((sum, item) => sum + item.count, 0)
@@ -140,7 +142,6 @@ const crawlSingleSource = async (sourceName: string) => {
ElMessage.error(`${sourceName} 更新失败: ${res.data.error || '未知错误'}`) ElMessage.error(`${sourceName} 更新失败: ${res.data.error || '未知错误'}`)
} }
// 刷新统计数据
await fetchCrawlStats() await fetchCrawlStats()
} catch (error) { } catch (error) {
console.error('Failed to crawl single source:', error) console.error('Failed to crawl single source:', error)
@@ -150,8 +151,30 @@ const crawlSingleSource = async (sourceName: string) => {
} }
} }
const startAutoRefresh = () => {
if (refreshTimer !== null) {
clearInterval(refreshTimer)
}
// 取消自动刷新
// refreshTimer = window.setInterval(() => {
// fetchCrawlStats()
// }, REFRESH_INTERVAL)
}
const stopAutoRefresh = () => {
if (refreshTimer !== null) {
clearInterval(refreshTimer)
refreshTimer = null
}
}
onMounted(() => { onMounted(() => {
fetchCrawlStats() fetchCrawlStats()
startAutoRefresh()
})
onBeforeUnmount(() => {
stopAutoRefresh()
}) })
</script> </script>

View File

@@ -1,10 +1,50 @@
import * as puppeteer from 'puppeteer'; import * as puppeteer from 'puppeteer';
import { Logger } from '@nestjs/common'; import { Logger } from '@nestjs/common';
async function simulateHumanMouseMovement(page: puppeteer.Page) {
const viewport = page.viewport();
if (!viewport) return;
const movements = 5 + Math.floor(Math.random() * 5);
for (let i = 0; i < movements; i++) {
const x = Math.floor(Math.random() * viewport.width);
const y = Math.floor(Math.random() * viewport.height);
await page.mouse.move(x, y, {
steps: 10 + Math.floor(Math.random() * 20),
});
await new Promise((r) => setTimeout(r, 100 + Math.random() * 400));
}
}
async function simulateHumanScrolling(page: puppeteer.Page) {
const scrollCount = 3 + Math.floor(Math.random() * 5);
for (let i = 0; i < scrollCount; i++) {
const scrollDistance = 100 + Math.floor(Math.random() * 400);
await page.evaluate((distance) => {
window.scrollBy({
top: distance,
behavior: 'smooth',
});
}, scrollDistance);
await new Promise((r) => setTimeout(r, 500 + Math.random() * 1000));
}
await page.evaluate(() => {
window.scrollTo({ top: 0, behavior: 'smooth' });
});
await new Promise((r) => setTimeout(r, 1000));
}
export interface ChdtpResult { export interface ChdtpResult {
title: string; title: string;
publishDate: Date; publishDate: Date;
url: string; // Necessary for system uniqueness url: string;
} }
interface ChdtpCrawlerType { interface ChdtpCrawlerType {
@@ -101,6 +141,12 @@ export const ChdtpCrawler = {
logger, logger,
); );
logger.log('Simulating human mouse movements...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling...');
await simulateHumanScrolling(page);
while (currentPage <= maxPages) { while (currentPage <= maxPages) {
const content = await page.content(); const content = await page.content();
const pageResults = this.extract(content); const pageResults = this.extract(content);
@@ -115,6 +161,12 @@ export const ChdtpCrawler = {
`Extracted ${pageResults.length} items from page ${currentPage}`, `Extracted ${pageResults.length} items from page ${currentPage}`,
); );
logger.log('Simulating human mouse movements before pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling before pagination...');
await simulateHumanScrolling(page);
// Find the "Next Page" button // Find the "Next Page" button
// Using partial match for src to be robust against path variations // Using partial match for src to be robust against path variations
const nextButtonSelector = 'input[type="image"][src*="page-next.png"]'; const nextButtonSelector = 'input[type="image"][src*="page-next.png"]';
@@ -125,9 +177,6 @@ export const ChdtpCrawler = {
break; break;
} }
// Optional: Check if the button is disabled (though image inputs usually aren't "disabled" in the same way)
// For this specific site, we'll try to click.
logger.log(`Navigating to page ${currentPage + 1}...`); logger.log(`Navigating to page ${currentPage + 1}...`);
try { try {
@@ -149,6 +198,12 @@ export const ChdtpCrawler = {
currentPage++; currentPage++;
logger.log('Simulating human mouse movements after pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling after pagination...');
await simulateHumanScrolling(page);
// Random delay between pages // Random delay between pages
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000; const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
await new Promise((resolve) => setTimeout(resolve, delay)); await new Promise((resolve) => setTimeout(resolve, delay));

View File

@@ -1,24 +1,24 @@
{ {
"name": "bidding-looker", "name" : "bidding-looker",
"appid": "__UNI__BIDDING_LOOKER", "appid" : "__UNI__1D1820F",
"description": "投标项目查看器", "description" : "投标项目查看器",
"versionName": "1.0.0", "versionName" : "1.0.0",
"versionCode": "100", "versionCode" : "100",
"transformPx": false, "transformPx" : false,
"app-plus": { "app-plus" : {
"usingComponents": true, "usingComponents" : true,
"nvueStyleCompiler": "uni-app", "nvueStyleCompiler" : "uni-app",
"compilerVersion": 3, "compilerVersion" : 3,
"splashscreen": { "splashscreen" : {
"alwaysShowBeforeRender": true, "alwaysShowBeforeRender" : true,
"waiting": true, "waiting" : true,
"autoclose": true, "autoclose" : true,
"delay": 0 "delay" : 0
}, },
"modules": {}, "modules" : {},
"distribute": { "distribute" : {
"android": { "android" : {
"permissions": [ "permissions" : [
"<uses-permission android:name=\"android.permission.CHANGE_NETWORK_STATE\"/>", "<uses-permission android:name=\"android.permission.CHANGE_NETWORK_STATE\"/>",
"<uses-permission android:name=\"android.permission.MOUNT_UNMOUNT_FILESYSTEMS\"/>", "<uses-permission android:name=\"android.permission.MOUNT_UNMOUNT_FILESYSTEMS\"/>",
"<uses-permission android:name=\"android.permission.VIBRATE\"/>", "<uses-permission android:name=\"android.permission.VIBRATE\"/>",
@@ -35,39 +35,41 @@
"<uses-permission android:name=\"android.permission.WRITE_SETTINGS\"/>" "<uses-permission android:name=\"android.permission.WRITE_SETTINGS\"/>"
] ]
}, },
"ios": {}, "ios" : {
"sdkConfigs": {} "dSYMs" : false
},
"sdkConfigs" : {}
} }
}, },
"quickapp": {}, "quickapp" : {},
"mp-weixin": { "mp-weixin" : {
"appid": "", "appid" : "",
"setting": { "setting" : {
"urlCheck": false "urlCheck" : false
}, },
"usingComponents": true "usingComponents" : true
}, },
"mp-alipay": { "mp-alipay" : {
"usingComponents": true "usingComponents" : true
}, },
"mp-baidu": { "mp-baidu" : {
"usingComponents": true "usingComponents" : true
}, },
"mp-toutiao": { "mp-toutiao" : {
"usingComponents": true "usingComponents" : true
}, },
"uniStatistics": { "uniStatistics" : {
"enable": false "enable" : false
}, },
"vueVersion": "3", "vueVersion" : "3",
"h5": { "h5" : {
"router": { "router" : {
"mode": "hash", "mode" : "hash",
"base": "/" "base" : "/"
}, },
"devServer": { "devServer" : {
"port": 8080, "port" : 8080,
"disableHostCheck": true "disableHostCheck" : true
} }
} }
} }