feat(crawler): add human-like behavior simulation to prevent detection
refactor(manifest): update appid and format manifest file
This commit is contained in:
@@ -74,7 +74,7 @@
|
|||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, computed, onMounted } from 'vue'
|
import { ref, computed, onMounted, onBeforeUnmount } from 'vue'
|
||||||
import api from '../utils/api'
|
import api from '../utils/api'
|
||||||
import { ElMessage } from 'element-plus'
|
import { ElMessage } from 'element-plus'
|
||||||
import { Refresh } from '@element-plus/icons-vue'
|
import { Refresh } from '@element-plus/icons-vue'
|
||||||
@@ -90,6 +90,8 @@ interface CrawlStat {
|
|||||||
const crawlStats = ref<CrawlStat[]>([])
|
const crawlStats = ref<CrawlStat[]>([])
|
||||||
const loading = ref(false)
|
const loading = ref(false)
|
||||||
const crawlingSources = ref<Set<string>>(new Set())
|
const crawlingSources = ref<Set<string>>(new Set())
|
||||||
|
const REFRESH_INTERVAL = 10000
|
||||||
|
let refreshTimer: number | null = null
|
||||||
|
|
||||||
const totalCount = computed(() => {
|
const totalCount = computed(() => {
|
||||||
return crawlStats.value.reduce((sum, item) => sum + item.count, 0)
|
return crawlStats.value.reduce((sum, item) => sum + item.count, 0)
|
||||||
@@ -140,7 +142,6 @@ const crawlSingleSource = async (sourceName: string) => {
|
|||||||
ElMessage.error(`${sourceName} 更新失败: ${res.data.error || '未知错误'}`)
|
ElMessage.error(`${sourceName} 更新失败: ${res.data.error || '未知错误'}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 刷新统计数据
|
|
||||||
await fetchCrawlStats()
|
await fetchCrawlStats()
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to crawl single source:', error)
|
console.error('Failed to crawl single source:', error)
|
||||||
@@ -150,8 +151,30 @@ const crawlSingleSource = async (sourceName: string) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const startAutoRefresh = () => {
|
||||||
|
if (refreshTimer !== null) {
|
||||||
|
clearInterval(refreshTimer)
|
||||||
|
}
|
||||||
|
// 取消自动刷新
|
||||||
|
// refreshTimer = window.setInterval(() => {
|
||||||
|
// fetchCrawlStats()
|
||||||
|
// }, REFRESH_INTERVAL)
|
||||||
|
}
|
||||||
|
|
||||||
|
const stopAutoRefresh = () => {
|
||||||
|
if (refreshTimer !== null) {
|
||||||
|
clearInterval(refreshTimer)
|
||||||
|
refreshTimer = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
fetchCrawlStats()
|
fetchCrawlStats()
|
||||||
|
startAutoRefresh()
|
||||||
|
})
|
||||||
|
|
||||||
|
onBeforeUnmount(() => {
|
||||||
|
stopAutoRefresh()
|
||||||
})
|
})
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,50 @@
|
|||||||
import * as puppeteer from 'puppeteer';
|
import * as puppeteer from 'puppeteer';
|
||||||
import { Logger } from '@nestjs/common';
|
import { Logger } from '@nestjs/common';
|
||||||
|
|
||||||
|
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||||
|
const viewport = page.viewport();
|
||||||
|
if (!viewport) return;
|
||||||
|
|
||||||
|
const movements = 5 + Math.floor(Math.random() * 5);
|
||||||
|
|
||||||
|
for (let i = 0; i < movements; i++) {
|
||||||
|
const x = Math.floor(Math.random() * viewport.width);
|
||||||
|
const y = Math.floor(Math.random() * viewport.height);
|
||||||
|
|
||||||
|
await page.mouse.move(x, y, {
|
||||||
|
steps: 10 + Math.floor(Math.random() * 20),
|
||||||
|
});
|
||||||
|
|
||||||
|
await new Promise((r) => setTimeout(r, 100 + Math.random() * 400));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||||
|
const scrollCount = 3 + Math.floor(Math.random() * 5);
|
||||||
|
|
||||||
|
for (let i = 0; i < scrollCount; i++) {
|
||||||
|
const scrollDistance = 100 + Math.floor(Math.random() * 400);
|
||||||
|
|
||||||
|
await page.evaluate((distance) => {
|
||||||
|
window.scrollBy({
|
||||||
|
top: distance,
|
||||||
|
behavior: 'smooth',
|
||||||
|
});
|
||||||
|
}, scrollDistance);
|
||||||
|
|
||||||
|
await new Promise((r) => setTimeout(r, 500 + Math.random() * 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluate(() => {
|
||||||
|
window.scrollTo({ top: 0, behavior: 'smooth' });
|
||||||
|
});
|
||||||
|
await new Promise((r) => setTimeout(r, 1000));
|
||||||
|
}
|
||||||
|
|
||||||
export interface ChdtpResult {
|
export interface ChdtpResult {
|
||||||
title: string;
|
title: string;
|
||||||
publishDate: Date;
|
publishDate: Date;
|
||||||
url: string; // Necessary for system uniqueness
|
url: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ChdtpCrawlerType {
|
interface ChdtpCrawlerType {
|
||||||
@@ -101,6 +141,12 @@ export const ChdtpCrawler = {
|
|||||||
logger,
|
logger,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
logger.log('Simulating human mouse movements...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
while (currentPage <= maxPages) {
|
while (currentPage <= maxPages) {
|
||||||
const content = await page.content();
|
const content = await page.content();
|
||||||
const pageResults = this.extract(content);
|
const pageResults = this.extract(content);
|
||||||
@@ -115,6 +161,12 @@ export const ChdtpCrawler = {
|
|||||||
`Extracted ${pageResults.length} items from page ${currentPage}`,
|
`Extracted ${pageResults.length} items from page ${currentPage}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
logger.log('Simulating human mouse movements before pagination...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling before pagination...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
// Find the "Next Page" button
|
// Find the "Next Page" button
|
||||||
// Using partial match for src to be robust against path variations
|
// Using partial match for src to be robust against path variations
|
||||||
const nextButtonSelector = 'input[type="image"][src*="page-next.png"]';
|
const nextButtonSelector = 'input[type="image"][src*="page-next.png"]';
|
||||||
@@ -125,9 +177,6 @@ export const ChdtpCrawler = {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optional: Check if the button is disabled (though image inputs usually aren't "disabled" in the same way)
|
|
||||||
// For this specific site, we'll try to click.
|
|
||||||
|
|
||||||
logger.log(`Navigating to page ${currentPage + 1}...`);
|
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -149,6 +198,12 @@ export const ChdtpCrawler = {
|
|||||||
|
|
||||||
currentPage++;
|
currentPage++;
|
||||||
|
|
||||||
|
logger.log('Simulating human mouse movements after pagination...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling after pagination...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
// Random delay between pages
|
// Random delay between pages
|
||||||
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||||
|
|||||||
@@ -1,24 +1,24 @@
|
|||||||
{
|
{
|
||||||
"name": "bidding-looker",
|
"name" : "bidding-looker",
|
||||||
"appid": "__UNI__BIDDING_LOOKER",
|
"appid" : "__UNI__1D1820F",
|
||||||
"description": "投标项目查看器",
|
"description" : "投标项目查看器",
|
||||||
"versionName": "1.0.0",
|
"versionName" : "1.0.0",
|
||||||
"versionCode": "100",
|
"versionCode" : "100",
|
||||||
"transformPx": false,
|
"transformPx" : false,
|
||||||
"app-plus": {
|
"app-plus" : {
|
||||||
"usingComponents": true,
|
"usingComponents" : true,
|
||||||
"nvueStyleCompiler": "uni-app",
|
"nvueStyleCompiler" : "uni-app",
|
||||||
"compilerVersion": 3,
|
"compilerVersion" : 3,
|
||||||
"splashscreen": {
|
"splashscreen" : {
|
||||||
"alwaysShowBeforeRender": true,
|
"alwaysShowBeforeRender" : true,
|
||||||
"waiting": true,
|
"waiting" : true,
|
||||||
"autoclose": true,
|
"autoclose" : true,
|
||||||
"delay": 0
|
"delay" : 0
|
||||||
},
|
},
|
||||||
"modules": {},
|
"modules" : {},
|
||||||
"distribute": {
|
"distribute" : {
|
||||||
"android": {
|
"android" : {
|
||||||
"permissions": [
|
"permissions" : [
|
||||||
"<uses-permission android:name=\"android.permission.CHANGE_NETWORK_STATE\"/>",
|
"<uses-permission android:name=\"android.permission.CHANGE_NETWORK_STATE\"/>",
|
||||||
"<uses-permission android:name=\"android.permission.MOUNT_UNMOUNT_FILESYSTEMS\"/>",
|
"<uses-permission android:name=\"android.permission.MOUNT_UNMOUNT_FILESYSTEMS\"/>",
|
||||||
"<uses-permission android:name=\"android.permission.VIBRATE\"/>",
|
"<uses-permission android:name=\"android.permission.VIBRATE\"/>",
|
||||||
@@ -35,39 +35,41 @@
|
|||||||
"<uses-permission android:name=\"android.permission.WRITE_SETTINGS\"/>"
|
"<uses-permission android:name=\"android.permission.WRITE_SETTINGS\"/>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"ios": {},
|
"ios" : {
|
||||||
"sdkConfigs": {}
|
"dSYMs" : false
|
||||||
|
},
|
||||||
|
"sdkConfigs" : {}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"quickapp": {},
|
"quickapp" : {},
|
||||||
"mp-weixin": {
|
"mp-weixin" : {
|
||||||
"appid": "",
|
"appid" : "",
|
||||||
"setting": {
|
"setting" : {
|
||||||
"urlCheck": false
|
"urlCheck" : false
|
||||||
},
|
},
|
||||||
"usingComponents": true
|
"usingComponents" : true
|
||||||
},
|
},
|
||||||
"mp-alipay": {
|
"mp-alipay" : {
|
||||||
"usingComponents": true
|
"usingComponents" : true
|
||||||
},
|
},
|
||||||
"mp-baidu": {
|
"mp-baidu" : {
|
||||||
"usingComponents": true
|
"usingComponents" : true
|
||||||
},
|
},
|
||||||
"mp-toutiao": {
|
"mp-toutiao" : {
|
||||||
"usingComponents": true
|
"usingComponents" : true
|
||||||
},
|
},
|
||||||
"uniStatistics": {
|
"uniStatistics" : {
|
||||||
"enable": false
|
"enable" : false
|
||||||
},
|
},
|
||||||
"vueVersion": "3",
|
"vueVersion" : "3",
|
||||||
"h5": {
|
"h5" : {
|
||||||
"router": {
|
"router" : {
|
||||||
"mode": "hash",
|
"mode" : "hash",
|
||||||
"base": "/"
|
"base" : "/"
|
||||||
},
|
},
|
||||||
"devServer": {
|
"devServer" : {
|
||||||
"port": 8080,
|
"port" : 8080,
|
||||||
"disableHostCheck": true
|
"disableHostCheck" : true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user