feat(crawler): add human-like behavior simulation to prevent detection

refactor(manifest): update appid and format manifest file
This commit is contained in:
dmy
2026-01-15 15:34:00 +08:00
parent 36cbb6fda1
commit eba5c7e5c5
3 changed files with 155 additions and 75 deletions

View File

@@ -74,7 +74,7 @@
</template>
<script setup lang="ts">
import { ref, computed, onMounted } from 'vue'
import { ref, computed, onMounted, onBeforeUnmount } from 'vue'
import api from '../utils/api'
import { ElMessage } from 'element-plus'
import { Refresh } from '@element-plus/icons-vue'
@@ -90,6 +90,8 @@ interface CrawlStat {
const crawlStats = ref<CrawlStat[]>([])
const loading = ref(false)
const crawlingSources = ref<Set<string>>(new Set())
const REFRESH_INTERVAL = 10000
let refreshTimer: number | null = null
const totalCount = computed(() => {
return crawlStats.value.reduce((sum, item) => sum + item.count, 0)
@@ -140,7 +142,6 @@ const crawlSingleSource = async (sourceName: string) => {
ElMessage.error(`${sourceName} 更新失败: ${res.data.error || '未知错误'}`)
}
// 刷新统计数据
await fetchCrawlStats()
} catch (error) {
console.error('Failed to crawl single source:', error)
@@ -150,8 +151,30 @@ const crawlSingleSource = async (sourceName: string) => {
}
}
const startAutoRefresh = () => {
if (refreshTimer !== null) {
clearInterval(refreshTimer)
}
// 取消自动刷新
// refreshTimer = window.setInterval(() => {
// fetchCrawlStats()
// }, REFRESH_INTERVAL)
}
const stopAutoRefresh = () => {
if (refreshTimer !== null) {
clearInterval(refreshTimer)
refreshTimer = null
}
}
onMounted(() => {
fetchCrawlStats()
startAutoRefresh()
})
onBeforeUnmount(() => {
stopAutoRefresh()
})
</script>

View File

@@ -1,10 +1,50 @@
import * as puppeteer from 'puppeteer';
import { Logger } from '@nestjs/common';
async function simulateHumanMouseMovement(page: puppeteer.Page) {
const viewport = page.viewport();
if (!viewport) return;
const movements = 5 + Math.floor(Math.random() * 5);
for (let i = 0; i < movements; i++) {
const x = Math.floor(Math.random() * viewport.width);
const y = Math.floor(Math.random() * viewport.height);
await page.mouse.move(x, y, {
steps: 10 + Math.floor(Math.random() * 20),
});
await new Promise((r) => setTimeout(r, 100 + Math.random() * 400));
}
}
async function simulateHumanScrolling(page: puppeteer.Page) {
const scrollCount = 3 + Math.floor(Math.random() * 5);
for (let i = 0; i < scrollCount; i++) {
const scrollDistance = 100 + Math.floor(Math.random() * 400);
await page.evaluate((distance) => {
window.scrollBy({
top: distance,
behavior: 'smooth',
});
}, scrollDistance);
await new Promise((r) => setTimeout(r, 500 + Math.random() * 1000));
}
await page.evaluate(() => {
window.scrollTo({ top: 0, behavior: 'smooth' });
});
await new Promise((r) => setTimeout(r, 1000));
}
export interface ChdtpResult {
title: string;
publishDate: Date;
url: string; // Necessary for system uniqueness
url: string;
}
interface ChdtpCrawlerType {
@@ -101,6 +141,12 @@ export const ChdtpCrawler = {
logger,
);
logger.log('Simulating human mouse movements...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling...');
await simulateHumanScrolling(page);
while (currentPage <= maxPages) {
const content = await page.content();
const pageResults = this.extract(content);
@@ -115,6 +161,12 @@ export const ChdtpCrawler = {
`Extracted ${pageResults.length} items from page ${currentPage}`,
);
logger.log('Simulating human mouse movements before pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling before pagination...');
await simulateHumanScrolling(page);
// Find the "Next Page" button
// Using partial match for src to be robust against path variations
const nextButtonSelector = 'input[type="image"][src*="page-next.png"]';
@@ -125,9 +177,6 @@ export const ChdtpCrawler = {
break;
}
// Optional: Check if the button is disabled (though image inputs usually aren't "disabled" in the same way)
// For this specific site, we'll try to click.
logger.log(`Navigating to page ${currentPage + 1}...`);
try {
@@ -149,6 +198,12 @@ export const ChdtpCrawler = {
currentPage++;
logger.log('Simulating human mouse movements after pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling after pagination...');
await simulateHumanScrolling(page);
// Random delay between pages
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
await new Promise((resolve) => setTimeout(resolve, delay));

View File

@@ -1,73 +1,75 @@
{
"name": "bidding-looker",
"appid": "__UNI__BIDDING_LOOKER",
"description": "投标项目查看器",
"versionName": "1.0.0",
"versionCode": "100",
"transformPx": false,
"app-plus": {
"usingComponents": true,
"nvueStyleCompiler": "uni-app",
"compilerVersion": 3,
"splashscreen": {
"alwaysShowBeforeRender": true,
"waiting": true,
"autoclose": true,
"delay": 0
"name" : "bidding-looker",
"appid" : "__UNI__1D1820F",
"description" : "投标项目查看器",
"versionName" : "1.0.0",
"versionCode" : "100",
"transformPx" : false,
"app-plus" : {
"usingComponents" : true,
"nvueStyleCompiler" : "uni-app",
"compilerVersion" : 3,
"splashscreen" : {
"alwaysShowBeforeRender" : true,
"waiting" : true,
"autoclose" : true,
"delay" : 0
},
"modules" : {},
"distribute" : {
"android" : {
"permissions" : [
"<uses-permission android:name=\"android.permission.CHANGE_NETWORK_STATE\"/>",
"<uses-permission android:name=\"android.permission.MOUNT_UNMOUNT_FILESYSTEMS\"/>",
"<uses-permission android:name=\"android.permission.VIBRATE\"/>",
"<uses-permission android:name=\"android.permission.READ_LOGS\"/>",
"<uses-permission android:name=\"android.permission.ACCESS_WIFI_STATE\"/>",
"<uses-feature android:name=\"android.hardware.camera.autofocus\"/>",
"<uses-permission android:name=\"android.permission.ACCESS_NETWORK_STATE\"/>",
"<uses-permission android:name=\"android.permission.CAMERA\"/>",
"<uses-permission android:name=\"android.permission.GET_ACCOUNTS\"/>",
"<uses-permission android:name=\"android.permission.READ_PHONE_STATE\"/>",
"<uses-permission android:name=\"android.permission.CHANGE_WIFI_STATE\"/>",
"<uses-permission android:name=\"android.permission.WAKE_LOCK\"/>",
"<uses-permission android:name=\"android.permission.FLASHLIGHT\"/>",
"<uses-permission android:name=\"android.permission.WRITE_SETTINGS\"/>"
]
},
"ios" : {
"dSYMs" : false
},
"sdkConfigs" : {}
}
},
"modules": {},
"distribute": {
"android": {
"permissions": [
"<uses-permission android:name=\"android.permission.CHANGE_NETWORK_STATE\"/>",
"<uses-permission android:name=\"android.permission.MOUNT_UNMOUNT_FILESYSTEMS\"/>",
"<uses-permission android:name=\"android.permission.VIBRATE\"/>",
"<uses-permission android:name=\"android.permission.READ_LOGS\"/>",
"<uses-permission android:name=\"android.permission.ACCESS_WIFI_STATE\"/>",
"<uses-feature android:name=\"android.hardware.camera.autofocus\"/>",
"<uses-permission android:name=\"android.permission.ACCESS_NETWORK_STATE\"/>",
"<uses-permission android:name=\"android.permission.CAMERA\"/>",
"<uses-permission android:name=\"android.permission.GET_ACCOUNTS\"/>",
"<uses-permission android:name=\"android.permission.READ_PHONE_STATE\"/>",
"<uses-permission android:name=\"android.permission.CHANGE_WIFI_STATE\"/>",
"<uses-permission android:name=\"android.permission.WAKE_LOCK\"/>",
"<uses-permission android:name=\"android.permission.FLASHLIGHT\"/>",
"<uses-permission android:name=\"android.permission.WRITE_SETTINGS\"/>"
]
},
"ios": {},
"sdkConfigs": {}
"quickapp" : {},
"mp-weixin" : {
"appid" : "",
"setting" : {
"urlCheck" : false
},
"usingComponents" : true
},
"mp-alipay" : {
"usingComponents" : true
},
"mp-baidu" : {
"usingComponents" : true
},
"mp-toutiao" : {
"usingComponents" : true
},
"uniStatistics" : {
"enable" : false
},
"vueVersion" : "3",
"h5" : {
"router" : {
"mode" : "hash",
"base" : "/"
},
"devServer" : {
"port" : 8080,
"disableHostCheck" : true
}
}
},
"quickapp": {},
"mp-weixin": {
"appid": "",
"setting": {
"urlCheck": false
},
"usingComponents": true
},
"mp-alipay": {
"usingComponents": true
},
"mp-baidu": {
"usingComponents": true
},
"mp-toutiao": {
"usingComponents": true
},
"uniStatistics": {
"enable": false
},
"vueVersion": "3",
"h5": {
"router": {
"mode": "hash",
"base": "/"
},
"devServer": {
"port": 8080,
"disableHostCheck": true
}
}
}