feat(crawler): add human-like behavior simulation to prevent detection

refactor(manifest): update appid and format manifest file
This commit is contained in:
dmy
2026-01-15 15:34:00 +08:00
parent 36cbb6fda1
commit eba5c7e5c5
3 changed files with 155 additions and 75 deletions

View File

@@ -74,7 +74,7 @@
</template>
<script setup lang="ts">
import { ref, computed, onMounted } from 'vue'
import { ref, computed, onMounted, onBeforeUnmount } from 'vue'
import api from '../utils/api'
import { ElMessage } from 'element-plus'
import { Refresh } from '@element-plus/icons-vue'
@@ -90,6 +90,8 @@ interface CrawlStat {
const crawlStats = ref<CrawlStat[]>([])
const loading = ref(false)
const crawlingSources = ref<Set<string>>(new Set())
const REFRESH_INTERVAL = 10000
let refreshTimer: number | null = null
const totalCount = computed(() => {
return crawlStats.value.reduce((sum, item) => sum + item.count, 0)
@@ -140,7 +142,6 @@ const crawlSingleSource = async (sourceName: string) => {
ElMessage.error(`${sourceName} 更新失败: ${res.data.error || '未知错误'}`)
}
// 刷新统计数据
await fetchCrawlStats()
} catch (error) {
console.error('Failed to crawl single source:', error)
@@ -150,8 +151,30 @@ const crawlSingleSource = async (sourceName: string) => {
}
}
const startAutoRefresh = () => {
if (refreshTimer !== null) {
clearInterval(refreshTimer)
}
// 取消自动刷新
// refreshTimer = window.setInterval(() => {
// fetchCrawlStats()
// }, REFRESH_INTERVAL)
}
const stopAutoRefresh = () => {
if (refreshTimer !== null) {
clearInterval(refreshTimer)
refreshTimer = null
}
}
onMounted(() => {
fetchCrawlStats()
startAutoRefresh()
})
onBeforeUnmount(() => {
stopAutoRefresh()
})
</script>

View File

@@ -1,10 +1,50 @@
import * as puppeteer from 'puppeteer';
import { Logger } from '@nestjs/common';
async function simulateHumanMouseMovement(page: puppeteer.Page) {
const viewport = page.viewport();
if (!viewport) return;
const movements = 5 + Math.floor(Math.random() * 5);
for (let i = 0; i < movements; i++) {
const x = Math.floor(Math.random() * viewport.width);
const y = Math.floor(Math.random() * viewport.height);
await page.mouse.move(x, y, {
steps: 10 + Math.floor(Math.random() * 20),
});
await new Promise((r) => setTimeout(r, 100 + Math.random() * 400));
}
}
async function simulateHumanScrolling(page: puppeteer.Page) {
const scrollCount = 3 + Math.floor(Math.random() * 5);
for (let i = 0; i < scrollCount; i++) {
const scrollDistance = 100 + Math.floor(Math.random() * 400);
await page.evaluate((distance) => {
window.scrollBy({
top: distance,
behavior: 'smooth',
});
}, scrollDistance);
await new Promise((r) => setTimeout(r, 500 + Math.random() * 1000));
}
await page.evaluate(() => {
window.scrollTo({ top: 0, behavior: 'smooth' });
});
await new Promise((r) => setTimeout(r, 1000));
}
export interface ChdtpResult {
title: string;
publishDate: Date;
url: string; // Necessary for system uniqueness
url: string;
}
interface ChdtpCrawlerType {
@@ -101,6 +141,12 @@ export const ChdtpCrawler = {
logger,
);
logger.log('Simulating human mouse movements...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling...');
await simulateHumanScrolling(page);
while (currentPage <= maxPages) {
const content = await page.content();
const pageResults = this.extract(content);
@@ -115,6 +161,12 @@ export const ChdtpCrawler = {
`Extracted ${pageResults.length} items from page ${currentPage}`,
);
logger.log('Simulating human mouse movements before pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling before pagination...');
await simulateHumanScrolling(page);
// Find the "Next Page" button
// Using partial match for src to be robust against path variations
const nextButtonSelector = 'input[type="image"][src*="page-next.png"]';
@@ -125,9 +177,6 @@ export const ChdtpCrawler = {
break;
}
// Optional: Check if the button is disabled (though image inputs usually aren't "disabled" in the same way)
// For this specific site, we'll try to click.
logger.log(`Navigating to page ${currentPage + 1}...`);
try {
@@ -149,6 +198,12 @@ export const ChdtpCrawler = {
currentPage++;
logger.log('Simulating human mouse movements after pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling after pagination...');
await simulateHumanScrolling(page);
// Random delay between pages
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
await new Promise((resolve) => setTimeout(resolve, delay));

View File

@@ -1,6 +1,6 @@
{
"name" : "bidding-looker",
"appid": "__UNI__BIDDING_LOOKER",
"appid" : "__UNI__1D1820F",
"description" : "投标项目查看器",
"versionName" : "1.0.0",
"versionCode" : "100",
@@ -35,7 +35,9 @@
"<uses-permission android:name=\"android.permission.WRITE_SETTINGS\"/>"
]
},
"ios": {},
"ios" : {
"dSYMs" : false
},
"sdkConfigs" : {}
}
},