Compare commits
5 Commits
8b2f328981
...
f2630ed01c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f2630ed01c | ||
|
|
b1435523e8 | ||
|
|
f1ec37143c | ||
|
|
090e4121ce | ||
|
|
4f37b0fb61 |
100
README.md
100
README.md
@@ -97,26 +97,86 @@ Nest is an MIT-licensed open source project. It can grow thanks to the sponsors
|
|||||||
|
|
||||||
Nest is [MIT licensed](https://github.com/nestjs/nest/blob/master/LICENSE).
|
Nest is [MIT licensed](https://github.com/nestjs/nest/blob/master/LICENSE).
|
||||||
|
|
||||||
How to Run:
|
## How to Run
|
||||||
1. Database Setup: Update the .env file with your PostgreSQL credentials.
|
|
||||||
|
|
||||||
1 DATABASE_TYPE=postgres
|
### 1. Database Setup
|
||||||
2 DATABASE_HOST=localhost
|
Update the `.env` file with your PostgreSQL credentials:
|
||||||
3 DATABASE_PORT=5432
|
|
||||||
4 DATABASE_USERNAME=your_username
|
|
||||||
5 DATABASE_PASSWORD=your_password
|
|
||||||
6 DATABASE_NAME=bidding
|
|
||||||
7 DATABASE_SYNCHRONIZE=true
|
|
||||||
2. Install Dependencies:
|
|
||||||
1 npm install
|
|
||||||
2 cd frontend && npm install
|
|
||||||
3. Build and Start:
|
|
||||||
|
|
||||||
1 # From the root directory
|
```env
|
||||||
2 cd frontend && npm run build
|
DATABASE_TYPE=postgres
|
||||||
3 cd ..
|
DATABASE_HOST=localhost
|
||||||
4 npm run build
|
DATABASE_PORT=5432
|
||||||
5 npm run start
|
DATABASE_USERNAME=your_username
|
||||||
|
DATABASE_PASSWORD=your_password
|
||||||
|
DATABASE_NAME=bidding
|
||||||
|
DATABASE_SYNCHRONIZE=true
|
||||||
|
```
|
||||||
|
|
||||||
The system will automatically initialize with the preset keywords: "山东", "海", "建设", "工程", "采购". You can
|
### 2. Install Dependencies
|
||||||
manage these and view crawled bidding information at http://localhost:3000.
|
|
||||||
|
```bash
|
||||||
|
npm install
|
||||||
|
cd frontend && npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Build and Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the root directory
|
||||||
|
cd frontend && npm run build
|
||||||
|
cd ..
|
||||||
|
npm run build
|
||||||
|
npm run start
|
||||||
|
```
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
### Frontend Features
|
||||||
|
|
||||||
|
- **Dashboard**: View high priority bids and today's bids
|
||||||
|
- **Date Filtering**:
|
||||||
|
- Click "3天" or "7天" buttons to filter bids from the last 3 or 7 days
|
||||||
|
- The filter only limits the start date, showing all data from the selected start date onwards (including data newer than the end date)
|
||||||
|
- **Keyword Filtering**: Filter bids by keywords (saved in localStorage)
|
||||||
|
- **All Bids**: View all bids with pagination and source filtering
|
||||||
|
- **Keyword Management**: Add and delete keywords with weight-based priority
|
||||||
|
|
||||||
|
### Backend Features
|
||||||
|
|
||||||
|
- **Multi-Source Crawling**: Crawls bidding information from multiple sources:
|
||||||
|
- ChdtpCrawler
|
||||||
|
- ChngCrawler
|
||||||
|
- SzecpCrawler
|
||||||
|
- CdtCrawler
|
||||||
|
- EpsCrawler
|
||||||
|
- CnncecpCrawler
|
||||||
|
- CgnpcCrawler
|
||||||
|
- CeicCrawler
|
||||||
|
- EspicCrawler
|
||||||
|
- PowerbeijingCrawler
|
||||||
|
- **Automatic Retry**: If a crawler returns 0 items, it will be retried after all crawlers complete
|
||||||
|
- **Proxy Support**: Configurable proxy settings via environment variables
|
||||||
|
- **Scheduled Tasks**: Automatic crawling at scheduled intervals
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
```env
|
||||||
|
# Database
|
||||||
|
DATABASE_TYPE=postgres
|
||||||
|
DATABASE_HOST=localhost
|
||||||
|
DATABASE_PORT=5432
|
||||||
|
DATABASE_USERNAME=your_username
|
||||||
|
DATABASE_PASSWORD=your_password
|
||||||
|
DATABASE_NAME=bidding
|
||||||
|
DATABASE_SYNCHRONIZE=true
|
||||||
|
|
||||||
|
# Proxy (optional)
|
||||||
|
PROXY_HOST=your_proxy_host
|
||||||
|
PROXY_PORT=your_proxy_port
|
||||||
|
PROXY_USERNAME=your_proxy_username
|
||||||
|
PROXY_PASSWORD=your_proxy_password
|
||||||
|
```
|
||||||
|
|
||||||
|
## Initial Setup
|
||||||
|
|
||||||
|
The system will automatically initialize with the preset keywords: "山东", "海", "建设", "工程", "采购". You can manage these and view crawled bidding information at http://localhost:3000.
|
||||||
@@ -31,167 +31,43 @@
|
|||||||
</el-header>
|
</el-header>
|
||||||
|
|
||||||
<el-main>
|
<el-main>
|
||||||
<div v-if="activeIndex === '1'">
|
<Dashboard
|
||||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
v-if="activeIndex === '1'"
|
||||||
<h2 style="margin: 0;">Dashboard</h2>
|
:today-bids="todayBids"
|
||||||
<el-button type="primary" :loading="crawling" :disabled="isCrawling" @click="handleCrawl">
|
:high-priority-bids="highPriorityBids"
|
||||||
<el-icon style="margin-right: 5px"><Refresh /></el-icon>
|
:keywords="keywords"
|
||||||
立刻抓取
|
:loading="loading"
|
||||||
</el-button>
|
:is-crawling="isCrawling"
|
||||||
</div>
|
@refresh="fetchData"
|
||||||
<el-row :gutter="20">
|
/>
|
||||||
<el-col :span="24">
|
|
||||||
<el-card class="box-card" shadow="hover">
|
|
||||||
<template #header>
|
|
||||||
<div class="card-header">
|
|
||||||
<span>High Priority Bids</span>
|
|
||||||
<el-tag type="danger">Top 10</el-tag>
|
|
||||||
</div>
|
|
||||||
</template>
|
|
||||||
<el-table :data="highPriorityBids" style="width: 100%" size="small">
|
|
||||||
<el-table-column prop="title" label="Title">
|
|
||||||
<template #default="scope">
|
|
||||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column prop="source" label="Source" width="240" />
|
|
||||||
<el-table-column prop="publishDate" label="Date" width="120">
|
|
||||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
|
||||||
</el-table-column>
|
|
||||||
</el-table>
|
|
||||||
</el-card>
|
|
||||||
</el-col>
|
|
||||||
</el-row>
|
|
||||||
<el-divider />
|
|
||||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
|
|
||||||
<h3 style="margin: 0;">Today's Bids</h3>
|
|
||||||
<div style="display: flex; gap: 10px;">
|
|
||||||
<el-date-picker
|
|
||||||
v-model="dateRange"
|
|
||||||
type="daterange"
|
|
||||||
range-separator="To"
|
|
||||||
start-placeholder="Start Date"
|
|
||||||
end-placeholder="End Date"
|
|
||||||
format="YYYY-MM-DD"
|
|
||||||
value-format="YYYY-MM-DD"
|
|
||||||
clearable
|
|
||||||
style="width: 240px;"
|
|
||||||
/>
|
|
||||||
<el-button type="primary" @click="setLast3Days">3天</el-button>
|
|
||||||
<el-button type="primary" @click="setLast7Days">7天</el-button>
|
|
||||||
<el-select
|
|
||||||
v-model="selectedKeywords"
|
|
||||||
multiple
|
|
||||||
collapse-tags
|
|
||||||
collapse-tags-tooltip
|
|
||||||
placeholder="Filter by Keywords"
|
|
||||||
clearable
|
|
||||||
style="width: 300px;"
|
|
||||||
>
|
|
||||||
<el-option
|
|
||||||
v-for="keyword in keywords"
|
|
||||||
:key="keyword.id"
|
|
||||||
:label="keyword.word"
|
|
||||||
:value="keyword.word"
|
|
||||||
/>
|
|
||||||
</el-select>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<el-table :data="filteredTodayBids" v-loading="loading" style="width: 100%">
|
|
||||||
<el-table-column prop="title" label="Title">
|
|
||||||
<template #default="scope">
|
|
||||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column prop="source" label="Source" width="220" />
|
|
||||||
<el-table-column prop="publishDate" label="Date" width="150">
|
|
||||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
|
||||||
</el-table-column>
|
|
||||||
</el-table>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div v-if="activeIndex === '2'">
|
<Bids
|
||||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
v-if="activeIndex === '2'"
|
||||||
<h2 style="margin: 0;">All Bids</h2>
|
:bids="bids"
|
||||||
<el-select v-model="selectedSource" placeholder="Filter by Source" clearable style="width: 200px" @change="currentPage = 1; fetchData()">
|
:source-options="sourceOptions"
|
||||||
<el-option
|
:loading="loading"
|
||||||
v-for="source in sourceOptions"
|
:total="total"
|
||||||
:key="source"
|
@fetch="handleFetchBids"
|
||||||
:label="source"
|
/>
|
||||||
:value="source"
|
|
||||||
/>
|
|
||||||
</el-select>
|
|
||||||
</div>
|
|
||||||
<el-table :data="bids" v-loading="loading" style="width: 100%">
|
|
||||||
<el-table-column prop="title" label="Title">
|
|
||||||
<template #default="scope">
|
|
||||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
|
||||||
</template>
|
|
||||||
</el-table-column>
|
|
||||||
<el-table-column prop="source" label="Source" width="200" />
|
|
||||||
<el-table-column prop="publishDate" label="Date" width="150">
|
|
||||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
|
||||||
</el-table-column>
|
|
||||||
</el-table>
|
|
||||||
<el-pagination
|
|
||||||
v-model:current-page="currentPage"
|
|
||||||
v-model:page-size="pageSize"
|
|
||||||
:page-sizes="[10, 20, 50, 100]"
|
|
||||||
:total="total"
|
|
||||||
layout="total, sizes, prev, pager, next, jumper"
|
|
||||||
@current-change="handlePageChange"
|
|
||||||
@size-change="handleSizeChange"
|
|
||||||
style="margin-top: 20px; justify-content: flex-end;"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div v-if="activeIndex === '3'">
|
<Keywords
|
||||||
<div class="card-header" style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
v-if="activeIndex === '3'"
|
||||||
<h2>Keyword Management</h2>
|
:keywords="keywords"
|
||||||
<el-button type="primary" @click="dialogVisible = true">Add Keyword</el-button>
|
:loading="loading"
|
||||||
</div>
|
@refresh="fetchData"
|
||||||
|
/>
|
||||||
<div v-loading="loading" style="min-height: 200px;">
|
|
||||||
<el-tag
|
|
||||||
v-for="keyword in keywords"
|
|
||||||
:key="keyword.id"
|
|
||||||
closable
|
|
||||||
:type="getTagType(keyword.weight)"
|
|
||||||
@close="handleDeleteKeyword(keyword.id)"
|
|
||||||
style="margin: 5px;"
|
|
||||||
>
|
|
||||||
{{ keyword.word }}
|
|
||||||
</el-tag>
|
|
||||||
<el-empty v-if="keywords.length === 0" description="No keywords" />
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</el-main>
|
</el-main>
|
||||||
</el-container>
|
</el-container>
|
||||||
|
|
||||||
<el-dialog v-model="dialogVisible" title="Add Keyword" width="30%">
|
|
||||||
<el-form :model="form" label-width="120px">
|
|
||||||
<el-form-item label="Keyword">
|
|
||||||
<el-input v-model="form.word" />
|
|
||||||
</el-form-item>
|
|
||||||
<el-form-item label="Weight">
|
|
||||||
<el-input-number v-model="form.weight" :min="1" :max="5" />
|
|
||||||
</el-form-item>
|
|
||||||
</el-form>
|
|
||||||
<template #footer>
|
|
||||||
<span class="dialog-footer">
|
|
||||||
<el-button @click="dialogVisible = false">Cancel</el-button>
|
|
||||||
<el-button type="primary" @click="handleAddKeyword">Confirm</el-button>
|
|
||||||
</span>
|
|
||||||
</template>
|
|
||||||
</el-dialog>
|
|
||||||
</el-container>
|
</el-container>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, onMounted, reactive, computed, watch } from 'vue'
|
import { ref, onMounted } from 'vue'
|
||||||
import axios from 'axios'
|
import axios from 'axios'
|
||||||
import { ElMessage } from 'element-plus'
|
import { DataBoard, Document, Setting } from '@element-plus/icons-vue'
|
||||||
import { DataBoard, Document, Setting, Refresh } from '@element-plus/icons-vue'
|
import Dashboard from './components/Dashboard.vue'
|
||||||
|
import Bids from './components/Bids.vue'
|
||||||
|
import Keywords from './components/Keywords.vue'
|
||||||
|
|
||||||
const activeIndex = ref('1')
|
const activeIndex = ref('1')
|
||||||
const bids = ref<any[]>([])
|
const bids = ref<any[]>([])
|
||||||
@@ -199,192 +75,32 @@ const todayBids = ref<any[]>([])
|
|||||||
const highPriorityBids = ref<any[]>([])
|
const highPriorityBids = ref<any[]>([])
|
||||||
const keywords = ref<any[]>([])
|
const keywords = ref<any[]>([])
|
||||||
const loading = ref(false)
|
const loading = ref(false)
|
||||||
const crawling = ref(false)
|
const isCrawling = ref(false)
|
||||||
const dialogVisible = ref(false)
|
|
||||||
const selectedSource = ref('')
|
|
||||||
const currentPage = ref(1)
|
|
||||||
const pageSize = ref(10)
|
|
||||||
const total = ref(0)
|
const total = ref(0)
|
||||||
const sourceOptions = ref<string[]>([])
|
const sourceOptions = ref<string[]>([])
|
||||||
const isCrawling = ref(false)
|
|
||||||
const selectedKeywords = ref<string[]>([])
|
|
||||||
const dateRange = ref<[string, string] | null>(null)
|
|
||||||
|
|
||||||
// 从 localStorage 加载保存的关键字
|
|
||||||
const loadSavedKeywords = () => {
|
|
||||||
const saved = localStorage.getItem('selectedKeywords')
|
|
||||||
if (saved) {
|
|
||||||
try {
|
|
||||||
selectedKeywords.value = JSON.parse(saved)
|
|
||||||
} catch (e) {
|
|
||||||
console.error('Failed to parse saved keywords:', e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 监听关键字变化并保存到 localStorage
|
|
||||||
watch(selectedKeywords, (newKeywords) => {
|
|
||||||
localStorage.setItem('selectedKeywords', JSON.stringify(newKeywords))
|
|
||||||
}, { deep: true })
|
|
||||||
|
|
||||||
// 监听日期范围变化并显示提示
|
|
||||||
watch(dateRange, () => {
|
|
||||||
const totalBids = bids.value.length
|
|
||||||
const filteredCount = filteredTodayBids.value.length
|
|
||||||
|
|
||||||
if (totalBids > 0 && filteredCount < totalBids) {
|
|
||||||
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
const form = reactive({
|
|
||||||
word: '',
|
|
||||||
weight: 1
|
|
||||||
})
|
|
||||||
|
|
||||||
// 根据 weight 获取 tag 类型
|
|
||||||
const getTagType = (weight: number) => {
|
|
||||||
if (weight >= 5) return 'danger'
|
|
||||||
if (weight >= 4) return 'warning'
|
|
||||||
if (weight >= 3) return 'primary'
|
|
||||||
if (weight >= 2) return 'success'
|
|
||||||
return 'info'
|
|
||||||
}
|
|
||||||
|
|
||||||
const handleSelect = (key: string) => {
|
const handleSelect = (key: string) => {
|
||||||
activeIndex.value = key
|
activeIndex.value = key
|
||||||
}
|
}
|
||||||
|
|
||||||
// 处理分页变化
|
const handleFetchBids = async (page: number, limit: number, source?: string) => {
|
||||||
const handlePageChange = (page: number) => {
|
loading.value = true
|
||||||
currentPage.value = page
|
try {
|
||||||
fetchData()
|
const res = await axios.get('/api/bids', {
|
||||||
}
|
params: {
|
||||||
|
page,
|
||||||
// 处理每页数量变化
|
limit,
|
||||||
const handleSizeChange = (size: number) => {
|
source: source || undefined
|
||||||
pageSize.value = size
|
}
|
||||||
currentPage.value = 1
|
|
||||||
fetchData()
|
|
||||||
}
|
|
||||||
|
|
||||||
// 设置日期范围为最近3天
|
|
||||||
const setLast3Days = () => {
|
|
||||||
const endDate = new Date()
|
|
||||||
const startDate = new Date()
|
|
||||||
startDate.setDate(startDate.getDate() - 2) // 最近3天(包括今天)
|
|
||||||
|
|
||||||
const formatDateForPicker = (date: Date) => {
|
|
||||||
const year = date.getFullYear()
|
|
||||||
const month = String(date.getMonth() + 1).padStart(2, '0')
|
|
||||||
const day = String(date.getDate()).padStart(2, '0')
|
|
||||||
return `${year}-${month}-${day}`
|
|
||||||
}
|
|
||||||
|
|
||||||
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
|
||||||
|
|
||||||
console.log('setLast3Days called, todayBids:', todayBids.value.length, 'dateRange:', dateRange.value)
|
|
||||||
|
|
||||||
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
|
||||||
const start = new Date(startDate)
|
|
||||||
start.setHours(0, 0, 0, 0)
|
|
||||||
|
|
||||||
let result = todayBids.value
|
|
||||||
result = result.filter(bid => {
|
|
||||||
if (!bid.publishDate) return false
|
|
||||||
const bidDate = new Date(bid.publishDate)
|
|
||||||
return bidDate >= start
|
|
||||||
})
|
|
||||||
|
|
||||||
const totalBids = todayBids.value.length
|
|
||||||
const filteredCount = result.length
|
|
||||||
|
|
||||||
console.log('setLast3Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
|
||||||
if (totalBids === 0) {
|
|
||||||
ElMessage.warning('暂无数据,请先抓取数据')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 设置日期范围为最近7天
|
|
||||||
const setLast7Days = () => {
|
|
||||||
const endDate = new Date()
|
|
||||||
const startDate = new Date()
|
|
||||||
startDate.setDate(startDate.getDate() - 6) // 最近7天(包括今天)
|
|
||||||
|
|
||||||
const formatDateForPicker = (date: Date) => {
|
|
||||||
const year = date.getFullYear()
|
|
||||||
const month = String(date.getMonth() + 1).padStart(2, '0')
|
|
||||||
const day = String(date.getDate()).padStart(2, '0')
|
|
||||||
return `${year}-${month}-${day}`
|
|
||||||
}
|
|
||||||
|
|
||||||
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
|
||||||
|
|
||||||
console.log('setLast7Days called, todayBids:', todayBids.value.length, 'dateRange:', dateRange.value)
|
|
||||||
|
|
||||||
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
|
||||||
const start = new Date(startDate)
|
|
||||||
start.setHours(0, 0, 0, 0)
|
|
||||||
|
|
||||||
let result = todayBids.value
|
|
||||||
result = result.filter(bid => {
|
|
||||||
if (!bid.publishDate) return false
|
|
||||||
const bidDate = new Date(bid.publishDate)
|
|
||||||
return bidDate >= start
|
|
||||||
})
|
|
||||||
|
|
||||||
const totalBids = todayBids.value.length
|
|
||||||
const filteredCount = result.length
|
|
||||||
|
|
||||||
console.log('setLast7Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
|
||||||
if (totalBids === 0) {
|
|
||||||
ElMessage.warning('暂无数据,请先抓取数据')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const formatDate = (dateString: string) => {
|
|
||||||
if (!dateString) return '-'
|
|
||||||
return new Date(dateString).toLocaleDateString()
|
|
||||||
}
|
|
||||||
|
|
||||||
// 过滤 Today's Bids,只显示包含所选关键字的项目,并且在日期范围内
|
|
||||||
const filteredTodayBids = computed(() => {
|
|
||||||
let result = todayBids.value
|
|
||||||
|
|
||||||
// 按关键字筛选
|
|
||||||
if (selectedKeywords.value.length > 0) {
|
|
||||||
result = result.filter(bid => {
|
|
||||||
return selectedKeywords.value.some(keyword =>
|
|
||||||
bid.title.toLowerCase().includes(keyword.toLowerCase())
|
|
||||||
)
|
|
||||||
})
|
})
|
||||||
|
bids.value = res.data.items
|
||||||
|
total.value = res.data.total
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch bids:', error)
|
||||||
|
} finally {
|
||||||
|
loading.value = false
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// 按日期范围筛选(只限制开始时间,不限制结束时间)
|
|
||||||
if (dateRange.value && dateRange.value.length === 2) {
|
|
||||||
const [startDate] = dateRange.value
|
|
||||||
result = result.filter(bid => {
|
|
||||||
if (!bid.publishDate) return false
|
|
||||||
const bidDate = new Date(bid.publishDate)
|
|
||||||
const start = new Date(startDate)
|
|
||||||
// 设置时间为当天的开始
|
|
||||||
start.setHours(0, 0, 0, 0)
|
|
||||||
return bidDate >= start
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
})
|
|
||||||
|
|
||||||
// 监听筛选结果变化并显示提示
|
|
||||||
watch(filteredTodayBids, (newFilteredBids) => {
|
|
||||||
const totalBids = todayBids.value.length
|
|
||||||
const filteredCount = newFilteredBids.length
|
|
||||||
|
|
||||||
if (totalBids > 0 && filteredCount < totalBids) {
|
|
||||||
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
|
||||||
}
|
|
||||||
}, { deep: true })
|
|
||||||
|
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
loading.value = true
|
loading.value = true
|
||||||
@@ -392,9 +108,8 @@ const fetchData = async () => {
|
|||||||
const [bidsRes, recentRes, highRes, kwRes, sourcesRes, statusRes] = await Promise.all([
|
const [bidsRes, recentRes, highRes, kwRes, sourcesRes, statusRes] = await Promise.all([
|
||||||
axios.get('/api/bids', {
|
axios.get('/api/bids', {
|
||||||
params: {
|
params: {
|
||||||
page: currentPage.value,
|
page: 1,
|
||||||
limit: pageSize.value,
|
limit: 10
|
||||||
source: selectedSource.value || undefined
|
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
axios.get('/api/bids/recent'),
|
axios.get('/api/bids/recent'),
|
||||||
@@ -411,58 +126,13 @@ const fetchData = async () => {
|
|||||||
sourceOptions.value = sourcesRes.data
|
sourceOptions.value = sourcesRes.data
|
||||||
isCrawling.value = statusRes.data.isCrawling
|
isCrawling.value = statusRes.data.isCrawling
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
ElMessage.error('Failed to fetch data')
|
console.error('Failed to fetch data:', error)
|
||||||
} finally {
|
} finally {
|
||||||
loading.value = false
|
loading.value = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const handleCrawl = async () => {
|
|
||||||
if (isCrawling.value) {
|
|
||||||
ElMessage.warning('Crawl is already running')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
crawling.value = true
|
|
||||||
try {
|
|
||||||
await axios.post('/api/crawler/run')
|
|
||||||
ElMessage.success('Crawl completed successfully')
|
|
||||||
fetchData() // Refresh data after crawl
|
|
||||||
} catch (error) {
|
|
||||||
ElMessage.error('Failed to run crawl task')
|
|
||||||
} finally {
|
|
||||||
crawling.value = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const handleAddKeyword = async () => {
|
|
||||||
if (!form.word) {
|
|
||||||
ElMessage.warning('Please enter a keyword')
|
|
||||||
return
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
await axios.post('/api/keywords', form)
|
|
||||||
ElMessage.success('Keyword added')
|
|
||||||
dialogVisible.value = false
|
|
||||||
form.word = ''
|
|
||||||
form.weight = 1
|
|
||||||
fetchData()
|
|
||||||
} catch (error) {
|
|
||||||
ElMessage.error('Failed to add keyword')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const handleDeleteKeyword = async (id: string) => {
|
|
||||||
try {
|
|
||||||
await axios.delete(`/api/keywords/${id}`)
|
|
||||||
ElMessage.success('Keyword deleted')
|
|
||||||
fetchData()
|
|
||||||
} catch (error) {
|
|
||||||
ElMessage.error('Failed to delete keyword')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
loadSavedKeywords()
|
|
||||||
fetchData()
|
fetchData()
|
||||||
})
|
})
|
||||||
</script>
|
</script>
|
||||||
@@ -486,9 +156,4 @@ onMounted(() => {
|
|||||||
font-size: 18px;
|
font-size: 18px;
|
||||||
background-color: #434a50;
|
background-color: #434a50;
|
||||||
}
|
}
|
||||||
.card-header {
|
</style>
|
||||||
display: flex;
|
|
||||||
justify-content: space-between;
|
|
||||||
align-items: center;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
|
|||||||
78
frontend/src/components/Bids.vue
Normal file
78
frontend/src/components/Bids.vue
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
<template>
|
||||||
|
<div>
|
||||||
|
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||||
|
<h2 style="margin: 0;">All Bids</h2>
|
||||||
|
<el-select v-model="selectedSource" placeholder="Filter by Source" clearable style="width: 200px" @change="handleSourceChange">
|
||||||
|
<el-option
|
||||||
|
v-for="source in sourceOptions"
|
||||||
|
:key="source"
|
||||||
|
:label="source"
|
||||||
|
:value="source"
|
||||||
|
/>
|
||||||
|
</el-select>
|
||||||
|
</div>
|
||||||
|
<el-table :data="bids" v-loading="loading" style="width: 100%">
|
||||||
|
<el-table-column prop="title" label="Title">
|
||||||
|
<template #default="scope">
|
||||||
|
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
<el-table-column prop="source" label="Source" width="200" />
|
||||||
|
<el-table-column prop="publishDate" label="Date" width="150">
|
||||||
|
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||||
|
</el-table-column>
|
||||||
|
</el-table>
|
||||||
|
<el-pagination
|
||||||
|
v-model:current-page="currentPage"
|
||||||
|
v-model:page-size="pageSize"
|
||||||
|
:page-sizes="[10, 20, 50, 100]"
|
||||||
|
:total="total"
|
||||||
|
layout="total, sizes, prev, pager, next, jumper"
|
||||||
|
@current-change="handlePageChange"
|
||||||
|
@size-change="handleSizeChange"
|
||||||
|
style="margin-top: 20px; justify-content: flex-end;"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { ref } from 'vue'
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
bids: any[]
|
||||||
|
sourceOptions: string[]
|
||||||
|
loading: boolean
|
||||||
|
total: number
|
||||||
|
}
|
||||||
|
|
||||||
|
const props = defineProps<Props>()
|
||||||
|
|
||||||
|
const emit = defineEmits<{
|
||||||
|
fetch: [page: number, limit: number, source?: string]
|
||||||
|
}>()
|
||||||
|
|
||||||
|
const selectedSource = ref('')
|
||||||
|
const currentPage = ref(1)
|
||||||
|
const pageSize = ref(10)
|
||||||
|
|
||||||
|
const formatDate = (dateString: string) => {
|
||||||
|
if (!dateString) return '-'
|
||||||
|
return new Date(dateString).toLocaleDateString()
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleSourceChange = () => {
|
||||||
|
currentPage.value = 1
|
||||||
|
emit('fetch', currentPage.value, pageSize.value, selectedSource.value || undefined)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handlePageChange = (page: number) => {
|
||||||
|
currentPage.value = page
|
||||||
|
emit('fetch', currentPage.value, pageSize.value, selectedSource.value || undefined)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleSizeChange = (size: number) => {
|
||||||
|
pageSize.value = size
|
||||||
|
currentPage.value = 1
|
||||||
|
emit('fetch', currentPage.value, pageSize.value, selectedSource.value || undefined)
|
||||||
|
}
|
||||||
|
</script>
|
||||||
279
frontend/src/components/Dashboard.vue
Normal file
279
frontend/src/components/Dashboard.vue
Normal file
@@ -0,0 +1,279 @@
|
|||||||
|
<template>
|
||||||
|
<div>
|
||||||
|
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||||
|
<h2 style="margin: 0;">Dashboard</h2>
|
||||||
|
<el-button type="primary" :loading="crawling" :disabled="isCrawling" @click="handleCrawl">
|
||||||
|
<el-icon style="margin-right: 5px"><Refresh /></el-icon>
|
||||||
|
立刻抓取
|
||||||
|
</el-button>
|
||||||
|
</div>
|
||||||
|
<el-row :gutter="20">
|
||||||
|
<el-col :span="24">
|
||||||
|
<el-card class="box-card" shadow="hover">
|
||||||
|
<template #header>
|
||||||
|
<div class="card-header">
|
||||||
|
<span>High Priority Bids</span>
|
||||||
|
<el-tag type="danger">Top 10</el-tag>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
<el-table :data="highPriorityBids" style="width: 100%" size="small">
|
||||||
|
<el-table-column prop="title" label="Title">
|
||||||
|
<template #default="scope">
|
||||||
|
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
<el-table-column prop="source" label="Source" width="240" />
|
||||||
|
<el-table-column prop="publishDate" label="Date" width="120">
|
||||||
|
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||||
|
</el-table-column>
|
||||||
|
</el-table>
|
||||||
|
</el-card>
|
||||||
|
</el-col>
|
||||||
|
</el-row>
|
||||||
|
<el-divider />
|
||||||
|
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
|
||||||
|
<h3 style="margin: 0;">Today's Bids</h3>
|
||||||
|
<div style="display: flex; gap: 10px;">
|
||||||
|
<el-date-picker
|
||||||
|
v-model="dateRange"
|
||||||
|
type="daterange"
|
||||||
|
range-separator="To"
|
||||||
|
start-placeholder="Start Date"
|
||||||
|
end-placeholder="End Date"
|
||||||
|
format="YYYY-MM-DD"
|
||||||
|
value-format="YYYY-MM-DD"
|
||||||
|
clearable
|
||||||
|
style="width: 240px;"
|
||||||
|
/>
|
||||||
|
<el-button type="primary" @click="setLast3Days">3天</el-button>
|
||||||
|
<el-button type="primary" @click="setLast7Days">7天</el-button>
|
||||||
|
<el-select
|
||||||
|
v-model="selectedKeywords"
|
||||||
|
multiple
|
||||||
|
collapse-tags
|
||||||
|
collapse-tags-tooltip
|
||||||
|
placeholder="Filter by Keywords"
|
||||||
|
clearable
|
||||||
|
style="width: 300px;"
|
||||||
|
>
|
||||||
|
<el-option
|
||||||
|
v-for="keyword in keywords"
|
||||||
|
:key="keyword.id"
|
||||||
|
:label="keyword.word"
|
||||||
|
:value="keyword.word"
|
||||||
|
/>
|
||||||
|
</el-select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<el-table :data="filteredTodayBids" v-loading="loading" style="width: 100%">
|
||||||
|
<el-table-column prop="title" label="Title">
|
||||||
|
<template #default="scope">
|
||||||
|
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||||
|
</template>
|
||||||
|
</el-table-column>
|
||||||
|
<el-table-column prop="source" label="Source" width="220" />
|
||||||
|
<el-table-column prop="publishDate" label="Date" width="150">
|
||||||
|
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||||
|
</el-table-column>
|
||||||
|
</el-table>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, computed, watch } from 'vue'
|
||||||
|
import axios from 'axios'
|
||||||
|
import { ElMessage } from 'element-plus'
|
||||||
|
import { Refresh } from '@element-plus/icons-vue'
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
todayBids: any[]
|
||||||
|
highPriorityBids: any[]
|
||||||
|
keywords: any[]
|
||||||
|
loading: boolean
|
||||||
|
isCrawling: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
const props = defineProps<Props>()
|
||||||
|
|
||||||
|
const emit = defineEmits<{
|
||||||
|
crawl: []
|
||||||
|
refresh: []
|
||||||
|
}>()
|
||||||
|
|
||||||
|
const selectedKeywords = ref<string[]>([])
|
||||||
|
const dateRange = ref<[string, string] | null>(null)
|
||||||
|
const crawling = ref(false)
|
||||||
|
|
||||||
|
// 从 localStorage 加载保存的关键字
|
||||||
|
const loadSavedKeywords = () => {
|
||||||
|
const saved = localStorage.getItem('selectedKeywords')
|
||||||
|
if (saved) {
|
||||||
|
try {
|
||||||
|
selectedKeywords.value = JSON.parse(saved)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to parse saved keywords:', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 监听关键字变化并保存到 localStorage
|
||||||
|
watch(selectedKeywords, (newKeywords) => {
|
||||||
|
localStorage.setItem('selectedKeywords', JSON.stringify(newKeywords))
|
||||||
|
}, { deep: true })
|
||||||
|
|
||||||
|
// 监听日期范围变化并显示提示
|
||||||
|
watch(dateRange, () => {
|
||||||
|
const totalBids = props.todayBids.length
|
||||||
|
const filteredCount = filteredTodayBids.value.length
|
||||||
|
|
||||||
|
if (totalBids > 0 && filteredCount < totalBids) {
|
||||||
|
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const formatDate = (dateString: string) => {
|
||||||
|
if (!dateString) return '-'
|
||||||
|
return new Date(dateString).toLocaleDateString()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 过滤 Today's Bids,只显示包含所选关键字的项目,并且在日期范围内
|
||||||
|
const filteredTodayBids = computed(() => {
|
||||||
|
let result = props.todayBids
|
||||||
|
|
||||||
|
// 按关键字筛选
|
||||||
|
if (selectedKeywords.value.length > 0) {
|
||||||
|
result = result.filter(bid => {
|
||||||
|
return selectedKeywords.value.some(keyword =>
|
||||||
|
bid.title.toLowerCase().includes(keyword.toLowerCase())
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// 按日期范围筛选(只限制开始时间,不限制结束时间)
|
||||||
|
if (dateRange.value && dateRange.value.length === 2) {
|
||||||
|
const [startDate] = dateRange.value
|
||||||
|
result = result.filter(bid => {
|
||||||
|
if (!bid.publishDate) return false
|
||||||
|
const bidDate = new Date(bid.publishDate)
|
||||||
|
const start = new Date(startDate)
|
||||||
|
// 设置时间为当天的开始
|
||||||
|
start.setHours(0, 0, 0, 0)
|
||||||
|
return bidDate >= start
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
})
|
||||||
|
|
||||||
|
// 监听筛选结果变化并显示提示
|
||||||
|
watch(filteredTodayBids, (newFilteredBids) => {
|
||||||
|
const totalBids = props.todayBids.length
|
||||||
|
const filteredCount = newFilteredBids.length
|
||||||
|
|
||||||
|
if (totalBids > 0 && filteredCount < totalBids) {
|
||||||
|
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
||||||
|
}
|
||||||
|
}, { deep: true })
|
||||||
|
|
||||||
|
// 设置日期范围为最近3天
|
||||||
|
const setLast3Days = () => {
|
||||||
|
const endDate = new Date()
|
||||||
|
const startDate = new Date()
|
||||||
|
startDate.setDate(startDate.getDate() - 2) // 最近3天(包括今天)
|
||||||
|
|
||||||
|
const formatDateForPicker = (date: Date) => {
|
||||||
|
const year = date.getFullYear()
|
||||||
|
const month = String(date.getMonth() + 1).padStart(2, '0')
|
||||||
|
const day = String(date.getDate()).padStart(2, '0')
|
||||||
|
return `${year}-${month}-${day}`
|
||||||
|
}
|
||||||
|
|
||||||
|
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
||||||
|
|
||||||
|
console.log('setLast3Days called, todayBids:', props.todayBids.length, 'dateRange:', dateRange.value)
|
||||||
|
|
||||||
|
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
||||||
|
const start = new Date(startDate)
|
||||||
|
start.setHours(0, 0, 0, 0)
|
||||||
|
|
||||||
|
let result = props.todayBids
|
||||||
|
result = result.filter(bid => {
|
||||||
|
if (!bid.publishDate) return false
|
||||||
|
const bidDate = new Date(bid.publishDate)
|
||||||
|
return bidDate >= start
|
||||||
|
})
|
||||||
|
|
||||||
|
const totalBids = props.todayBids.length
|
||||||
|
const filteredCount = result.length
|
||||||
|
|
||||||
|
console.log('setLast3Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
||||||
|
if (totalBids === 0) {
|
||||||
|
ElMessage.warning('暂无数据,请先抓取数据')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 设置日期范围为最近7天
|
||||||
|
const setLast7Days = () => {
|
||||||
|
const endDate = new Date()
|
||||||
|
const startDate = new Date()
|
||||||
|
startDate.setDate(startDate.getDate() - 6) // 最近7天(包括今天)
|
||||||
|
|
||||||
|
const formatDateForPicker = (date: Date) => {
|
||||||
|
const year = date.getFullYear()
|
||||||
|
const month = String(date.getMonth() + 1).padStart(2, '0')
|
||||||
|
const day = String(date.getDate()).padStart(2, '0')
|
||||||
|
return `${year}-${month}-${day}`
|
||||||
|
}
|
||||||
|
|
||||||
|
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
||||||
|
|
||||||
|
console.log('setLast7Days called, todayBids:', props.todayBids.length, 'dateRange:', dateRange.value)
|
||||||
|
|
||||||
|
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
||||||
|
const start = new Date(startDate)
|
||||||
|
start.setHours(0, 0, 0, 0)
|
||||||
|
|
||||||
|
let result = props.todayBids
|
||||||
|
result = result.filter(bid => {
|
||||||
|
if (!bid.publishDate) return false
|
||||||
|
const bidDate = new Date(bid.publishDate)
|
||||||
|
return bidDate >= start
|
||||||
|
})
|
||||||
|
|
||||||
|
const totalBids = props.todayBids.length
|
||||||
|
const filteredCount = result.length
|
||||||
|
|
||||||
|
console.log('setLast7Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
||||||
|
if (totalBids === 0) {
|
||||||
|
ElMessage.warning('暂无数据,请先抓取数据')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleCrawl = async () => {
|
||||||
|
if (props.isCrawling) {
|
||||||
|
ElMessage.warning('Crawl is already running')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
crawling.value = true
|
||||||
|
try {
|
||||||
|
await axios.post('/api/crawler/run')
|
||||||
|
ElMessage.success('Crawl completed successfully')
|
||||||
|
emit('refresh') // Refresh data after crawl
|
||||||
|
} catch (error) {
|
||||||
|
ElMessage.error('Failed to run crawl task')
|
||||||
|
} finally {
|
||||||
|
crawling.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 初始化时加载保存的关键字
|
||||||
|
loadSavedKeywords()
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<style scoped>
|
||||||
|
.card-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
<script setup lang="ts">
|
|
||||||
import { ref } from 'vue'
|
|
||||||
|
|
||||||
defineProps<{ msg: string }>()
|
|
||||||
|
|
||||||
const count = ref(0)
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<template>
|
|
||||||
<h1>{{ msg }}</h1>
|
|
||||||
|
|
||||||
<div class="card">
|
|
||||||
<button type="button" @click="count++">count is {{ count }}</button>
|
|
||||||
<p>
|
|
||||||
Edit
|
|
||||||
<code>components/HelloWorld.vue</code> to test HMR
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Check out
|
|
||||||
<a href="https://vuejs.org/guide/quick-start.html#local" target="_blank"
|
|
||||||
>create-vue</a
|
|
||||||
>, the official Vue + Vite starter
|
|
||||||
</p>
|
|
||||||
<p>
|
|
||||||
Learn more about IDE Support for Vue in the
|
|
||||||
<a
|
|
||||||
href="https://vuejs.org/guide/scaling-up/tooling.html#ide-support"
|
|
||||||
target="_blank"
|
|
||||||
>Vue Docs Scaling up Guide</a
|
|
||||||
>.
|
|
||||||
</p>
|
|
||||||
<p class="read-the-docs">Click on the Vite and Vue logos to learn more</p>
|
|
||||||
</template>
|
|
||||||
|
|
||||||
<style scoped>
|
|
||||||
.read-the-docs {
|
|
||||||
color: #888;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
107
frontend/src/components/Keywords.vue
Normal file
107
frontend/src/components/Keywords.vue
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
<template>
|
||||||
|
<div>
|
||||||
|
<div class="card-header" style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||||
|
<h2>Keyword Management</h2>
|
||||||
|
<el-button type="primary" @click="dialogVisible = true">Add Keyword</el-button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-loading="loading" style="min-height: 200px;">
|
||||||
|
<el-tag
|
||||||
|
v-for="keyword in keywords"
|
||||||
|
:key="keyword.id"
|
||||||
|
closable
|
||||||
|
:type="getTagType(keyword.weight)"
|
||||||
|
@close="handleDeleteKeyword(keyword.id)"
|
||||||
|
style="margin: 5px;"
|
||||||
|
>
|
||||||
|
{{ keyword.word }}
|
||||||
|
</el-tag>
|
||||||
|
<el-empty v-if="keywords.length === 0" description="No keywords" />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<el-dialog v-model="dialogVisible" title="Add Keyword" width="30%">
|
||||||
|
<el-form :model="form" label-width="120px">
|
||||||
|
<el-form-item label="Keyword">
|
||||||
|
<el-input v-model="form.word" />
|
||||||
|
</el-form-item>
|
||||||
|
<el-form-item label="Weight">
|
||||||
|
<el-input-number v-model="form.weight" :min="1" :max="5" />
|
||||||
|
</el-form-item>
|
||||||
|
</el-form>
|
||||||
|
<template #footer>
|
||||||
|
<span class="dialog-footer">
|
||||||
|
<el-button @click="dialogVisible = false">Cancel</el-button>
|
||||||
|
<el-button type="primary" @click="handleAddKeyword">Confirm</el-button>
|
||||||
|
</span>
|
||||||
|
</template>
|
||||||
|
</el-dialog>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, reactive } from 'vue'
|
||||||
|
import axios from 'axios'
|
||||||
|
import { ElMessage } from 'element-plus'
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
keywords: any[]
|
||||||
|
loading: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
const props = defineProps<Props>()
|
||||||
|
|
||||||
|
const emit = defineEmits<{
|
||||||
|
refresh: []
|
||||||
|
}>()
|
||||||
|
|
||||||
|
const dialogVisible = ref(false)
|
||||||
|
|
||||||
|
const form = reactive({
|
||||||
|
word: '',
|
||||||
|
weight: 1
|
||||||
|
})
|
||||||
|
|
||||||
|
// 根据 weight 获取 tag 类型
|
||||||
|
const getTagType = (weight: number) => {
|
||||||
|
if (weight >= 5) return 'danger'
|
||||||
|
if (weight >= 4) return 'warning'
|
||||||
|
if (weight >= 3) return 'primary'
|
||||||
|
if (weight >= 2) return 'success'
|
||||||
|
return 'info'
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleAddKeyword = async () => {
|
||||||
|
if (!form.word) {
|
||||||
|
ElMessage.warning('Please enter a keyword')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await axios.post('/api/keywords', form)
|
||||||
|
ElMessage.success('Keyword added')
|
||||||
|
dialogVisible.value = false
|
||||||
|
form.word = ''
|
||||||
|
form.weight = 1
|
||||||
|
emit('refresh')
|
||||||
|
} catch (error) {
|
||||||
|
ElMessage.error('Failed to add keyword')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleDeleteKeyword = async (id: string) => {
|
||||||
|
try {
|
||||||
|
await axios.delete(`/api/keywords/${id}`)
|
||||||
|
ElMessage.success('Keyword deleted')
|
||||||
|
emit('refresh')
|
||||||
|
} catch (error) {
|
||||||
|
ElMessage.error('Failed to delete keyword')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<style scoped>
|
||||||
|
.card-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
13
jest.config.js
Normal file
13
jest.config.js
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
module.exports = {
|
||||||
|
moduleFileExtensions: ['js', 'json', 'ts'],
|
||||||
|
rootDir: 'src',
|
||||||
|
testRegex: '.*\\.spec\\.ts$',
|
||||||
|
transform: {
|
||||||
|
'^.+\\.(t|j)s$': 'ts-jest',
|
||||||
|
},
|
||||||
|
collectCoverageFrom: ['**/*.(t|j)s'],
|
||||||
|
coverageDirectory: '../coverage',
|
||||||
|
testEnvironment: 'node',
|
||||||
|
// 加载环境变量
|
||||||
|
setupFiles: ['<rootDir>/../jest.setup.js'],
|
||||||
|
};
|
||||||
2
jest.setup.js
Normal file
2
jest.setup.js
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
// 加载环境变量
|
||||||
|
require('dotenv').config({ path: '.env' });
|
||||||
19
package.json
19
package.json
@@ -19,6 +19,7 @@
|
|||||||
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
|
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
|
||||||
"test:e2e": "jest --config ./test/jest-e2e.json",
|
"test:e2e": "jest --config ./test/jest-e2e.json",
|
||||||
"crawl": "ts-node -r tsconfig-paths/register src/scripts/crawl.ts",
|
"crawl": "ts-node -r tsconfig-paths/register src/scripts/crawl.ts",
|
||||||
|
"update-source": "ts-node -r tsconfig-paths/register src/scripts/update-source.ts",
|
||||||
"web":"npm --prefix frontend run build"
|
"web":"npm --prefix frontend run build"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
@@ -32,6 +33,7 @@
|
|||||||
"axios": "^1.13.2",
|
"axios": "^1.13.2",
|
||||||
"class-transformer": "^0.5.1",
|
"class-transformer": "^0.5.1",
|
||||||
"class-validator": "^0.14.3",
|
"class-validator": "^0.14.3",
|
||||||
|
"dotenv": "^16.4.7",
|
||||||
"mysql2": "^3.16.0",
|
"mysql2": "^3.16.0",
|
||||||
"puppeteer": "^24.34.0",
|
"puppeteer": "^24.34.0",
|
||||||
"puppeteer-extra": "^3.3.6",
|
"puppeteer-extra": "^3.3.6",
|
||||||
@@ -66,22 +68,5 @@
|
|||||||
"tsconfig-paths": "^4.2.0",
|
"tsconfig-paths": "^4.2.0",
|
||||||
"typescript": "^5.7.3",
|
"typescript": "^5.7.3",
|
||||||
"typescript-eslint": "^8.20.0"
|
"typescript-eslint": "^8.20.0"
|
||||||
},
|
|
||||||
"jest": {
|
|
||||||
"moduleFileExtensions": [
|
|
||||||
"js",
|
|
||||||
"json",
|
|
||||||
"ts"
|
|
||||||
],
|
|
||||||
"rootDir": "src",
|
|
||||||
"testRegex": ".*\\.spec\\.ts$",
|
|
||||||
"transform": {
|
|
||||||
"^.+\\.(t|j)s$": "ts-jest"
|
|
||||||
},
|
|
||||||
"collectCoverageFrom": [
|
|
||||||
"**/*.(t|j)s"
|
|
||||||
],
|
|
||||||
"coverageDirectory": "../coverage",
|
|
||||||
"testEnvironment": "node"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ import { CgnpcCrawler } from './cgnpc_target';
|
|||||||
import { CeicCrawler } from './ceic_target';
|
import { CeicCrawler } from './ceic_target';
|
||||||
import { EspicCrawler } from './espic_target';
|
import { EspicCrawler } from './espic_target';
|
||||||
import { PowerbeijingCrawler } from './powerbeijing_target';
|
import { PowerbeijingCrawler } from './powerbeijing_target';
|
||||||
|
import { SdiccCrawler } from './sdicc_target';
|
||||||
|
import { CnoocCrawler } from './cnooc_target';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class BidCrawlerService {
|
export class BidCrawlerService {
|
||||||
@@ -65,7 +67,7 @@ export class BidCrawlerService {
|
|||||||
args,
|
args,
|
||||||
});
|
});
|
||||||
|
|
||||||
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler, CnncecpCrawler, CgnpcCrawler, CeicCrawler, EspicCrawler, PowerbeijingCrawler];
|
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler, CnncecpCrawler, CgnpcCrawler, CeicCrawler, EspicCrawler, PowerbeijingCrawler, SdiccCrawler, CnoocCrawler];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (const crawler of crawlers) {
|
for (const crawler of crawlers) {
|
||||||
|
|||||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 60 seconds for network operations
|
// Increase timeout to 60 seconds for network operations
|
||||||
jest.setTimeout(60000*5);
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('CdtCrawler Real Site Test', () => {
|
describe('CdtCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Change to false to see browser UI
|
headless: false, // Change to false to see browser UI
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -11,11 +11,11 @@ async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
|||||||
for (let i = 0; i < movements; i++) {
|
for (let i = 0; i < movements; i++) {
|
||||||
const x = Math.floor(Math.random() * viewport.width);
|
const x = Math.floor(Math.random() * viewport.width);
|
||||||
const y = Math.floor(Math.random() * viewport.height);
|
const y = Math.floor(Math.random() * viewport.height);
|
||||||
|
|
||||||
await page.mouse.move(x, y, {
|
await page.mouse.move(x, y, {
|
||||||
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
||||||
});
|
});
|
||||||
|
|
||||||
// 随机停顿 100-500ms
|
// 随机停顿 100-500ms
|
||||||
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
||||||
}
|
}
|
||||||
@@ -27,7 +27,7 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
|||||||
|
|
||||||
for (let i = 0; i < scrollCount; i++) {
|
for (let i = 0; i < scrollCount; i++) {
|
||||||
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
||||||
|
|
||||||
await page.evaluate((distance) => {
|
await page.evaluate((distance) => {
|
||||||
window.scrollBy({
|
window.scrollBy({
|
||||||
top: distance,
|
top: distance,
|
||||||
@@ -80,7 +80,7 @@ export const CdtCrawler = {
|
|||||||
// 模拟人类行为
|
// 模拟人类行为
|
||||||
logger.log('Simulating human mouse movements...');
|
logger.log('Simulating human mouse movements...');
|
||||||
await simulateHumanMouseMovement(page);
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
logger.log('Simulating human scrolling...');
|
logger.log('Simulating human scrolling...');
|
||||||
await simulateHumanScrolling(page);
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
@@ -103,7 +103,7 @@ export const CdtCrawler = {
|
|||||||
// 模拟人类行为
|
// 模拟人类行为
|
||||||
logger.log('Simulating human mouse movements...');
|
logger.log('Simulating human mouse movements...');
|
||||||
await simulateHumanMouseMovement(page);
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
logger.log('Simulating human scrolling...');
|
logger.log('Simulating human scrolling...');
|
||||||
await simulateHumanScrolling(page);
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
@@ -113,7 +113,7 @@ export const CdtCrawler = {
|
|||||||
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
|
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
|
||||||
return titles.some(title => title.textContent && title.textContent.includes('招标公告'));
|
return titles.some(title => title.textContent && title.textContent.includes('招标公告'));
|
||||||
}, { timeout: 30000 });
|
}, { timeout: 30000 });
|
||||||
|
|
||||||
await page.evaluate(() => {
|
await page.evaluate(() => {
|
||||||
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
|
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
|
||||||
const targetTitle = titles.find(title => title.textContent && title.textContent.includes('招标公告'));
|
const targetTitle = titles.find(title => title.textContent && title.textContent.includes('招标公告'));
|
||||||
@@ -133,13 +133,22 @@ export const CdtCrawler = {
|
|||||||
// 模拟人类行为
|
// 模拟人类行为
|
||||||
logger.log('Simulating human mouse movements...');
|
logger.log('Simulating human mouse movements...');
|
||||||
await simulateHumanMouseMovement(page);
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
logger.log('Simulating human scrolling...');
|
logger.log('Simulating human scrolling...');
|
||||||
await simulateHumanScrolling(page);
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
// 等待表格加载完成
|
||||||
|
logger.log('Waiting for table to load...');
|
||||||
|
await page.waitForSelector('table.layui-table', { timeout: 30000 });
|
||||||
|
|
||||||
while (currentPage <= maxPages) {
|
while (currentPage <= maxPages) {
|
||||||
|
// 等待表格数据加载
|
||||||
|
await page.waitForSelector('tbody tr', { timeout: 10000 });
|
||||||
|
|
||||||
|
// 获取当前页面的 HTML 内容
|
||||||
const content = await page.content();
|
const content = await page.content();
|
||||||
const pageResults = this.extract(content);
|
const pageResults = this.extract(content);
|
||||||
|
|
||||||
if (pageResults.length === 0) {
|
if (pageResults.length === 0) {
|
||||||
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
||||||
break;
|
break;
|
||||||
@@ -151,43 +160,69 @@ export const CdtCrawler = {
|
|||||||
// 模拟人类行为 - 翻页前
|
// 模拟人类行为 - 翻页前
|
||||||
logger.log('Simulating human mouse movements before pagination...');
|
logger.log('Simulating human mouse movements before pagination...');
|
||||||
await simulateHumanMouseMovement(page);
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
logger.log('Simulating human scrolling before pagination...');
|
logger.log('Simulating human scrolling before pagination...');
|
||||||
await simulateHumanScrolling(page);
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
// Find the "Next Page" button - layui pagination
|
// 查找下一页按钮
|
||||||
const nextButtonSelector = 'a.layui-laypage-next:not(.layui-disabled)';
|
const nextButtonSelector = 'a.layui-laypage-next:not(.layui-disabled)';
|
||||||
const nextButton = await page.$(nextButtonSelector);
|
const nextButtonExists = await page.evaluate((selector) => {
|
||||||
|
const btn = document.querySelector(selector);
|
||||||
|
return btn !== null && !btn.classList.contains('layui-disabled');
|
||||||
|
}, nextButtonSelector);
|
||||||
|
|
||||||
if (!nextButton) {
|
if (!nextButtonExists) {
|
||||||
logger.log('Next page button not found. Reached end of list.');
|
logger.log('Next page button not found or disabled. Reached end of list.');
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.log(`Navigating to page ${currentPage + 1}...`);
|
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await Promise.all([
|
// 点击下一页按钮
|
||||||
page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }),
|
await page.evaluate((selector) => {
|
||||||
nextButton.click(),
|
const btn = document.querySelector(selector) as HTMLElement;
|
||||||
]);
|
if (btn) btn.click();
|
||||||
|
}, nextButtonSelector);
|
||||||
|
|
||||||
|
// 等待 AJAX 请求完成(通过监听网络请求)
|
||||||
|
await page.waitForFunction(() => {
|
||||||
|
// 检查表格是否正在加载
|
||||||
|
const loading = document.querySelector('.layui-table-loading');
|
||||||
|
return !loading;
|
||||||
|
}, { timeout: 30000 }).catch(() => {});
|
||||||
|
|
||||||
|
// 额外等待确保数据加载完成
|
||||||
|
await new Promise(r => setTimeout(r, 2000));
|
||||||
|
|
||||||
|
// 检查是否真的翻页了(通过检查当前页码)
|
||||||
|
const currentActivePage = await page.evaluate(() => {
|
||||||
|
const activeSpan = document.querySelector('.layui-laypage-curr em:last-child');
|
||||||
|
return activeSpan ? parseInt(activeSpan.textContent || '1') : 1;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (currentActivePage <= currentPage) {
|
||||||
|
logger.log('Page did not change, stopping.');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPage++;
|
||||||
|
|
||||||
|
// 模拟人类行为 - 翻页后
|
||||||
|
logger.log('Simulating human mouse movements after pagination...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling after pagination...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
// Random delay between pages
|
||||||
|
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
|
||||||
} catch (navError) {
|
} catch (navError) {
|
||||||
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
currentPage++;
|
|
||||||
|
|
||||||
// 模拟人类行为 - 翻页后
|
|
||||||
logger.log('Simulating human mouse movements after pagination...');
|
|
||||||
await simulateHumanMouseMovement(page);
|
|
||||||
|
|
||||||
logger.log('Simulating human scrolling after pagination...');
|
|
||||||
await simulateHumanScrolling(page);
|
|
||||||
|
|
||||||
// Random delay between pages
|
|
||||||
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
|
||||||
await new Promise(resolve => setTimeout(resolve, delay));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return allResults;
|
return allResults;
|
||||||
|
|||||||
@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||||
jest.setTimeout(120000);
|
jest.setTimeout(120000);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('CeicCrawler Real Site Test', () => {
|
describe('CeicCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Run in non-headless mode
|
headless: false, // Run in non-headless mode
|
||||||
args: [
|
args: [
|
||||||
@@ -16,6 +38,7 @@ describe('CeicCrawler Real Site Test', () => {
|
|||||||
'--disable-blink-features=AutomationControlled',
|
'--disable-blink-features=AutomationControlled',
|
||||||
'--window-size=1920,1080',
|
'--window-size=1920,1080',
|
||||||
'--disable-infobars',
|
'--disable-infobars',
|
||||||
|
...proxyArgs,
|
||||||
],
|
],
|
||||||
defaultViewport: null
|
defaultViewport: null
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const CeicCrawler = {
|
export const CeicCrawler = {
|
||||||
name: '大连能源采购平台',
|
name: '国家能源集团生态协作平台',
|
||||||
url: 'https://ceic.dlnyzb.com/3001',
|
url: 'https://ceic.dlnyzb.com/3001',
|
||||||
baseUrl: 'https://ceic.dlnyzb.com',
|
baseUrl: 'https://ceic.dlnyzb.com',
|
||||||
|
|
||||||
|
|||||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 60 seconds for network operations
|
// Increase timeout to 60 seconds for network operations
|
||||||
jest.setTimeout(60000*5);
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('CgnpcCrawler Real Site Test', () => {
|
describe('CgnpcCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Change to false to see browser UI
|
headless: false, // Change to false to see browser UI
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 60 seconds for network operations
|
// Increase timeout to 60 seconds for network operations
|
||||||
jest.setTimeout(60000);
|
jest.setTimeout(60000);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('ChdtpCrawler Real Site Test', () => {
|
describe('ChdtpCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: true, // Change to false to see the browser UI
|
headless: true, // Change to false to see the browser UI
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ export interface ChdtpResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const ChdtpCrawler = {
|
export const ChdtpCrawler = {
|
||||||
name: '中国华能集团',
|
name: '华电集团电子商务平台 ',
|
||||||
url: 'https://www.chdtp.com/webs/queryWebZbgg.action?zbggType=1',
|
url: 'https://www.chdtp.com/webs/queryWebZbgg.action?zbggType=1',
|
||||||
baseUrl: 'https://www.chdtp.com/webs/',
|
baseUrl: 'https://www.chdtp.com/webs/',
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,23 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||||
jest.setTimeout(120000);
|
jest.setTimeout(120000);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
// 模拟人类鼠标移动
|
// 模拟人类鼠标移动
|
||||||
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||||
const viewport = page.viewport();
|
const viewport = page.viewport();
|
||||||
@@ -53,6 +70,11 @@ describe('ChngCrawler Real Site Test', () => {
|
|||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Run in non-headless mode
|
headless: false, // Run in non-headless mode
|
||||||
args: [
|
args: [
|
||||||
@@ -61,6 +83,7 @@ describe('ChngCrawler Real Site Test', () => {
|
|||||||
'--disable-blink-features=AutomationControlled',
|
'--disable-blink-features=AutomationControlled',
|
||||||
'--window-size=1920,1080',
|
'--window-size=1920,1080',
|
||||||
"--disable-infobars",
|
"--disable-infobars",
|
||||||
|
...proxyArgs,
|
||||||
// "--headless=new",
|
// "--headless=new",
|
||||||
// '--disable-dev-shm-usage',
|
// '--disable-dev-shm-usage',
|
||||||
// '--disable-accelerated-2d-canvas',
|
// '--disable-accelerated-2d-canvas',
|
||||||
@@ -69,7 +92,7 @@ describe('ChngCrawler Real Site Test', () => {
|
|||||||
// '--disable-gpu',
|
// '--disable-gpu',
|
||||||
// '--disable-features=VizDisplayCompositor',
|
// '--disable-features=VizDisplayCompositor',
|
||||||
// '--disable-webgl',
|
// '--disable-webgl',
|
||||||
// '--disable-javascript',
|
// '--disable-javascript',
|
||||||
],
|
],
|
||||||
defaultViewport: null
|
defaultViewport: null
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,23 @@ puppeteer.use(StealthPlugin());
|
|||||||
// Increase timeout to 180 seconds for slow sites and stealth mode
|
// Increase timeout to 180 seconds for slow sites and stealth mode
|
||||||
jest.setTimeout(180000);
|
jest.setTimeout(180000);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
// 模拟人类鼠标移动
|
// 模拟人类鼠标移动
|
||||||
async function simulateHumanMouseMovement(page: Page) {
|
async function simulateHumanMouseMovement(page: Page) {
|
||||||
const viewport = page.viewport();
|
const viewport = page.viewport();
|
||||||
@@ -58,6 +75,11 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
|
|||||||
let browser: Browser;
|
let browser: Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: true, // 使用 headless 模式
|
headless: true, // 使用 headless 模式
|
||||||
args: [
|
args: [
|
||||||
@@ -73,6 +95,7 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
|
|||||||
'--disable-gpu',
|
'--disable-gpu',
|
||||||
'--disable-features=VizDisplayCompositor',
|
'--disable-features=VizDisplayCompositor',
|
||||||
'--disable-webgl',
|
'--disable-webgl',
|
||||||
|
...proxyArgs,
|
||||||
],
|
],
|
||||||
defaultViewport: null
|
defaultViewport: null
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 60 seconds for network operations
|
// Increase timeout to 60 seconds for network operations
|
||||||
jest.setTimeout(60000*5);
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('CnncecpCrawler Real Site Test', () => {
|
describe('CnncecpCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Change to false to see browser UI
|
headless: false, // Change to false to see browser UI
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
73
src/crawler/services/cnooc_target.spec.ts
Normal file
73
src/crawler/services/cnooc_target.spec.ts
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
import { CnoocCrawler } from './cnooc_target';
|
||||||
|
import * as puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
// Increase timeout to 60 seconds for network operations
|
||||||
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('CnoocCrawler Real Site Test', () => {
|
||||||
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
|
browser = await puppeteer.launch({
|
||||||
|
headless: false, // Change to false to see browser UI
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
if (browser) {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should visit website and list all found bid information', async () => {
|
||||||
|
console.log(`\nStarting crawl for: ${CnoocCrawler.name}`);
|
||||||
|
console.log(`Target URL: ${CnoocCrawler.url}`);
|
||||||
|
|
||||||
|
const results = await CnoocCrawler.crawl(browser);
|
||||||
|
|
||||||
|
console.log(`\nSuccessfully found ${results.length} items:\n`);
|
||||||
|
console.log('----------------------------------------');
|
||||||
|
results.forEach((item, index) => {
|
||||||
|
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
|
||||||
|
console.log(` Link: ${item.url}`);
|
||||||
|
console.log('----------------------------------------');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Basic assertions to ensure crawler is working
|
||||||
|
expect(results).toBeDefined();
|
||||||
|
expect(Array.isArray(results)).toBeTruthy();
|
||||||
|
// Warn but don't fail if site returns 0 items (could be empty or changed structure)
|
||||||
|
if (results.length === 0) {
|
||||||
|
console.warn('Warning: No items found. Check if website structure has changed or if list is currently empty.');
|
||||||
|
} else {
|
||||||
|
// Check data integrity of first item
|
||||||
|
const firstItem = results[0];
|
||||||
|
expect(firstItem.title).toBeTruthy();
|
||||||
|
expect(firstItem.url).toMatch(/^https?:\/\//);
|
||||||
|
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
195
src/crawler/services/cnooc_target.ts
Normal file
195
src/crawler/services/cnooc_target.ts
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
import * as puppeteer from 'puppeteer';
|
||||||
|
import { Logger } from '@nestjs/common';
|
||||||
|
|
||||||
|
// 模拟人类鼠标移动
|
||||||
|
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||||
|
const viewport = page.viewport();
|
||||||
|
if (!viewport) return;
|
||||||
|
|
||||||
|
const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动
|
||||||
|
|
||||||
|
for (let i = 0; i < movements; i++) {
|
||||||
|
const x = Math.floor(Math.random() * viewport.width);
|
||||||
|
const y = Math.floor(Math.random() * viewport.height);
|
||||||
|
|
||||||
|
await page.mouse.move(x, y, {
|
||||||
|
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
||||||
|
});
|
||||||
|
|
||||||
|
// 随机停顿 100-500ms
|
||||||
|
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 模拟人类滚动
|
||||||
|
async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||||
|
const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动
|
||||||
|
|
||||||
|
for (let i = 0; i < scrollCount; i++) {
|
||||||
|
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
||||||
|
|
||||||
|
await page.evaluate((distance) => {
|
||||||
|
window.scrollBy({
|
||||||
|
top: distance,
|
||||||
|
behavior: 'smooth'
|
||||||
|
});
|
||||||
|
}, scrollDistance);
|
||||||
|
|
||||||
|
// 随机停顿 500-1500ms
|
||||||
|
await new Promise(r => setTimeout(r, 500 + Math.random() * 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 滚动回顶部
|
||||||
|
await page.evaluate(() => {
|
||||||
|
window.scrollTo({ top: 0, behavior: 'smooth' });
|
||||||
|
});
|
||||||
|
await new Promise(r => setTimeout(r, 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CnoocResult {
|
||||||
|
title: string;
|
||||||
|
publishDate: Date;
|
||||||
|
url: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const CnoocCrawler = {
|
||||||
|
name: '中海油招标平台',
|
||||||
|
url: 'https://buy.cnooc.com.cn/cbjyweb/001/001001/moreinfo.html',
|
||||||
|
baseUrl: 'https://buy.cnooc.com.cn',
|
||||||
|
|
||||||
|
async crawl(browser: puppeteer.Browser): Promise<CnoocResult[]> {
|
||||||
|
const logger = new Logger('CnoocCrawler');
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
const username = process.env.PROXY_USERNAME;
|
||||||
|
const password = process.env.PROXY_PASSWORD;
|
||||||
|
if (username && password) {
|
||||||
|
await page.authenticate({ username, password });
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluateOnNewDocument(() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||||
|
Object.defineProperty(navigator, 'language', { get: () => "zh-CN"});
|
||||||
|
Object.defineProperty(navigator, 'plugins', { get: () => [1,2,3,4,5]});
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36');
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
const allResults: CnoocResult[] = [];
|
||||||
|
let currentPage = 1;
|
||||||
|
const maxPages = 5;
|
||||||
|
|
||||||
|
try {
|
||||||
|
logger.log(`Navigating to ${this.url}...`);
|
||||||
|
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
|
||||||
|
// 模拟人类行为
|
||||||
|
logger.log('Simulating human mouse movements...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
while (currentPage <= maxPages) {
|
||||||
|
logger.log(`Processing page ${currentPage}...`);
|
||||||
|
|
||||||
|
const content = await page.content();
|
||||||
|
const pageResults = this.extract(content);
|
||||||
|
|
||||||
|
if (pageResults.length === 0) {
|
||||||
|
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
allResults.push(...pageResults);
|
||||||
|
logger.log(`Extracted ${pageResults.length} items from page ${currentPage}`);
|
||||||
|
|
||||||
|
// 模拟人类行为 - 翻页前
|
||||||
|
logger.log('Simulating human mouse movements before pagination...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling before pagination...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
// 查找下一页按钮 - 中海油使用特定的分页结构
|
||||||
|
// 下一页链接格式: <a href="https://buy.cnooc.com.cn/cbjyweb/001/001001/2.html" class="pageLink">下页 ></a>
|
||||||
|
const nextButtonSelector = 'a.pageLink[href*="/cbjyweb/001/001001/"]';
|
||||||
|
const nextButton = await page.$(nextButtonSelector);
|
||||||
|
|
||||||
|
if (!nextButton) {
|
||||||
|
logger.log('Next page button not found. Reached end of list.');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 点击下一页按钮
|
||||||
|
await nextButton.click();
|
||||||
|
await new Promise(r => setTimeout(r, 3000)); // 等待页面加载
|
||||||
|
} catch (navError) {
|
||||||
|
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPage++;
|
||||||
|
|
||||||
|
// 模拟人类行为 - 翻页后
|
||||||
|
logger.log('Simulating human mouse movements after pagination...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling after pagination...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
// Random delay between pages
|
||||||
|
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
}
|
||||||
|
|
||||||
|
return allResults;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to crawl ${this.name}: ${error.message}`);
|
||||||
|
return allResults;
|
||||||
|
} finally {
|
||||||
|
await page.close();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
extract(html: string): CnoocResult[] {
|
||||||
|
const results: CnoocResult[] = [];
|
||||||
|
/**
|
||||||
|
* Regex groups for buy.cnooc.com.cn:
|
||||||
|
* 1: URL (href属性)
|
||||||
|
* 2: Title (font标签内的文本)
|
||||||
|
* 3: Date (发布时间,格式:2026-01-12)
|
||||||
|
*
|
||||||
|
* HTML结构示例:
|
||||||
|
* <li class="now-hd-items clearfix">
|
||||||
|
* <a href="https://buy.cnooc.com.cn/cbjyweb/001/001001/20260112/1460280812582768641-zhy.html" target="_blank" class="now-link" title="...">
|
||||||
|
* <font style="font-weight:bold">中海油服-物探事业部2026-2028年度海事许可办理及码头服务(二次)</font>
|
||||||
|
* </a>
|
||||||
|
* <span class="now-span" style="width:100px">2026-01-12</span>
|
||||||
|
* </li>
|
||||||
|
*/
|
||||||
|
const regex = /<li class="now-hd-items clearfix">[\s\S]*?<a[^>]*href="([^"]*)"[^>]*>[\s\S]*?<font[^>]*>([^<]*)<\/font>[\s\S]*?<span class="now-span"[^>]*>\s*(\d{4}-\d{2}-\d{2})\s*<\/span>[\s\S]*?<\/li>/gs;
|
||||||
|
|
||||||
|
let match;
|
||||||
|
while ((match = regex.exec(html)) !== null) {
|
||||||
|
const url = match[1]?.trim();
|
||||||
|
const title = match[2]?.trim();
|
||||||
|
const dateStr = match[3]?.trim();
|
||||||
|
|
||||||
|
if (title && url) {
|
||||||
|
results.push({
|
||||||
|
title,
|
||||||
|
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||||
|
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 60 seconds for network operations
|
// Increase timeout to 60 seconds for network operations
|
||||||
jest.setTimeout(60000*5);
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('EpsCrawler Real Site Test', () => {
|
describe('EpsCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Change to false to see browser UI
|
headless: false, // Change to false to see browser UI
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 60 seconds for network operations
|
// Increase timeout to 60 seconds for network operations
|
||||||
jest.setTimeout(60000*5);
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('EspicCrawler Real Site Test', () => {
|
describe('EspicCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Change to false to see browser UI
|
headless: false, // Change to false to see browser UI
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ export interface EspicResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const EspicCrawler = {
|
export const EspicCrawler = {
|
||||||
name: '电能e招采平台',
|
name: '电能e招采平台(国电投)',
|
||||||
baseUrl: 'https://ebid.espic.com.cn',
|
baseUrl: 'https://ebid.espic.com.cn',
|
||||||
|
|
||||||
// 生成动态 URL,使用当前日期
|
// 生成动态 URL,使用当前日期
|
||||||
|
|||||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 60 seconds for network operations
|
// Increase timeout to 60 seconds for network operations
|
||||||
jest.setTimeout(60000*5);
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('PowerbeijingCrawler Real Site Test', () => {
|
describe('PowerbeijingCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Change to false to see browser UI
|
headless: false, // Change to false to see browser UI
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ export interface PowerbeijingResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const PowerbeijingCrawler = {
|
export const PowerbeijingCrawler = {
|
||||||
name: '北京电力交易平台',
|
name: '北京京能电子商务平台',
|
||||||
url: 'https://www.powerbeijing-ec.com/jncms/search/bulletin.html?dates=300&categoryId=2&tabName=%E6%8B%9B%E6%A0%87%E5%85%AC%E5%91%8A&page=1',
|
url: 'https://www.powerbeijing-ec.com/jncms/search/bulletin.html?dates=300&categoryId=2&tabName=%E6%8B%9B%E6%A0%87%E5%85%AC%E5%91%8A&page=1',
|
||||||
baseUrl: 'https://www.powerbeijing-ec.com',
|
baseUrl: 'https://www.powerbeijing-ec.com',
|
||||||
|
|
||||||
|
|||||||
73
src/crawler/services/sdicc_target.spec.ts
Normal file
73
src/crawler/services/sdicc_target.spec.ts
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
import { SdiccCrawler } from './sdicc_target';
|
||||||
|
import * as puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
// Increase timeout to 60 seconds for network operations
|
||||||
|
jest.setTimeout(60000*5);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('SdiccCrawler Real Site Test', () => {
|
||||||
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
|
browser = await puppeteer.launch({
|
||||||
|
headless: false, // Change to false to see browser UI
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
if (browser) {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should visit website and list all found bid information', async () => {
|
||||||
|
console.log(`\nStarting crawl for: ${SdiccCrawler.name}`);
|
||||||
|
console.log(`Target URL: ${SdiccCrawler.url}`);
|
||||||
|
|
||||||
|
const results = await SdiccCrawler.crawl(browser);
|
||||||
|
|
||||||
|
console.log(`\nSuccessfully found ${results.length} items:\n`);
|
||||||
|
console.log('----------------------------------------');
|
||||||
|
results.forEach((item, index) => {
|
||||||
|
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
|
||||||
|
console.log(` Link: ${item.url}`);
|
||||||
|
console.log('----------------------------------------');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Basic assertions to ensure crawler is working
|
||||||
|
expect(results).toBeDefined();
|
||||||
|
expect(Array.isArray(results)).toBeTruthy();
|
||||||
|
// Warn but don't fail if site returns 0 items (could be empty or changed structure)
|
||||||
|
if (results.length === 0) {
|
||||||
|
console.warn('Warning: No items found. Check if website structure has changed or if list is currently empty.');
|
||||||
|
} else {
|
||||||
|
// Check data integrity of first item
|
||||||
|
const firstItem = results[0];
|
||||||
|
expect(firstItem.title).toBeTruthy();
|
||||||
|
expect(firstItem.url).toMatch(/^https?:\/\//);
|
||||||
|
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
203
src/crawler/services/sdicc_target.ts
Normal file
203
src/crawler/services/sdicc_target.ts
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
import * as puppeteer from 'puppeteer';
|
||||||
|
import { Logger } from '@nestjs/common';
|
||||||
|
|
||||||
|
// 模拟人类鼠标移动
|
||||||
|
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||||
|
const viewport = page.viewport();
|
||||||
|
if (!viewport) return;
|
||||||
|
|
||||||
|
const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动
|
||||||
|
|
||||||
|
for (let i = 0; i < movements; i++) {
|
||||||
|
const x = Math.floor(Math.random() * viewport.width);
|
||||||
|
const y = Math.floor(Math.random() * viewport.height);
|
||||||
|
|
||||||
|
await page.mouse.move(x, y, {
|
||||||
|
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
||||||
|
});
|
||||||
|
|
||||||
|
// 随机停顿 100-500ms
|
||||||
|
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 模拟人类滚动
|
||||||
|
async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||||
|
const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动
|
||||||
|
|
||||||
|
for (let i = 0; i < scrollCount; i++) {
|
||||||
|
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
||||||
|
|
||||||
|
await page.evaluate((distance) => {
|
||||||
|
window.scrollBy({
|
||||||
|
top: distance,
|
||||||
|
behavior: 'smooth'
|
||||||
|
});
|
||||||
|
}, scrollDistance);
|
||||||
|
|
||||||
|
// 随机停顿 500-1500ms
|
||||||
|
await new Promise(r => setTimeout(r, 500 + Math.random() * 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 滚动回顶部
|
||||||
|
await page.evaluate(() => {
|
||||||
|
window.scrollTo({ top: 0, behavior: 'smooth' });
|
||||||
|
});
|
||||||
|
await new Promise(r => setTimeout(r, 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SdiccResult {
|
||||||
|
title: string;
|
||||||
|
publishDate: Date;
|
||||||
|
url: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const SdiccCrawler = {
|
||||||
|
name: '国投集团电子采购平台',
|
||||||
|
url: 'https://www.sdicc.com.cn/cgxx/ggList',
|
||||||
|
baseUrl: 'https://www.sdicc.com.cn',
|
||||||
|
|
||||||
|
async crawl(browser: puppeteer.Browser): Promise<SdiccResult[]> {
|
||||||
|
const logger = new Logger('SdiccCrawler');
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
const username = process.env.PROXY_USERNAME;
|
||||||
|
const password = process.env.PROXY_PASSWORD;
|
||||||
|
if (username && password) {
|
||||||
|
await page.authenticate({ username, password });
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluateOnNewDocument(() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||||
|
Object.defineProperty(navigator, 'language', { get: () => "zh-CN"});
|
||||||
|
Object.defineProperty(navigator, 'plugins', { get: () => [1,2,3,4,5]});
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36');
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
const allResults: SdiccResult[] = [];
|
||||||
|
let currentPage = 1;
|
||||||
|
const maxPages = 5;
|
||||||
|
|
||||||
|
try {
|
||||||
|
logger.log(`Navigating to ${this.url}...`);
|
||||||
|
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
|
||||||
|
// 模拟人类行为
|
||||||
|
logger.log('Simulating human mouse movements...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
// 等待表格加载
|
||||||
|
logger.log('Waiting for table to load...');
|
||||||
|
await page.waitForSelector('.tbody table tbody tr', { timeout: 30000 }).catch(() => {
|
||||||
|
logger.warn('Table rows not found, trying alternative selectors...');
|
||||||
|
});
|
||||||
|
|
||||||
|
while (currentPage <= maxPages) {
|
||||||
|
logger.log(`Processing page ${currentPage}...`);
|
||||||
|
|
||||||
|
const content = await page.content();
|
||||||
|
const pageResults = this.extract(content);
|
||||||
|
|
||||||
|
if (pageResults.length === 0) {
|
||||||
|
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
allResults.push(...pageResults);
|
||||||
|
logger.log(`Extracted ${pageResults.length} items from page ${currentPage}`);
|
||||||
|
|
||||||
|
// 模拟人类行为 - 翻页前
|
||||||
|
logger.log('Simulating human mouse movements before pagination...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling before pagination...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
// 查找下一页按钮
|
||||||
|
const nextButtonSelector = '#page_btnLas';
|
||||||
|
const nextButton = await page.$(nextButtonSelector);
|
||||||
|
|
||||||
|
if (!nextButton) {
|
||||||
|
logger.log('Next page button not found. Reached end of list.');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 点击下一页按钮
|
||||||
|
await nextButton.click();
|
||||||
|
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }).catch(() => {});
|
||||||
|
await new Promise(r => setTimeout(r, 2000)); // 额外等待确保数据加载完成
|
||||||
|
} catch (navError) {
|
||||||
|
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPage++;
|
||||||
|
|
||||||
|
// 模拟人类行为 - 翻页后
|
||||||
|
logger.log('Simulating human mouse movements after pagination...');
|
||||||
|
await simulateHumanMouseMovement(page);
|
||||||
|
|
||||||
|
logger.log('Simulating human scrolling after pagination...');
|
||||||
|
await simulateHumanScrolling(page);
|
||||||
|
|
||||||
|
// Random delay between pages
|
||||||
|
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
}
|
||||||
|
|
||||||
|
return allResults;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to crawl ${this.name}: ${error.message}`);
|
||||||
|
return allResults;
|
||||||
|
} finally {
|
||||||
|
await page.close();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
extract(html: string): SdiccResult[] {
|
||||||
|
const results: SdiccResult[] = [];
|
||||||
|
/**
|
||||||
|
* Regex groups for sdicc.com.cn:
|
||||||
|
* 1: Title (公告名称/项目名称)
|
||||||
|
* 2: Date (发布时间,格式:2026-01-09)
|
||||||
|
* 3: gcGuid (项目GUID)
|
||||||
|
* 4: ggGuid (公告GUID)
|
||||||
|
*
|
||||||
|
* HTML结构示例:
|
||||||
|
* <tr onclick="urlChange('a853e226-09bd-441c-8f05-badb945932f0','ec2ccdd8-1464-4a96-ad99-24a5396d028c')">
|
||||||
|
* <td colspan="1" rowspan="1" style="text-align: center;">1</td>
|
||||||
|
* <td colspan="1" rowspan="1"><span style="margin-right: 5px;">国投罗钾公司硫酸钾厂球磨机控制系统升级项目公开招标公告</span></td>
|
||||||
|
* <td colspan="1" rowspan="1"><span>服务</span></td>
|
||||||
|
* <td colspan="1" rowspan="1"><span> 2026-01-09 </span></td>
|
||||||
|
* </tr>
|
||||||
|
*/
|
||||||
|
const regex = /<tr[^>]*onclick="urlChange\('([^']+)','([^']+)'\)"[^>]*>[\s\S]*?<td[^>]*><span[^>]*>([^<]+)<\/span><\/td>[\s\S]*?<td[^>]*><span[^>]*>\s*(\d{4}-\d{2}-\d{2})\s*<\/span><\/td>[\s\S]*?<\/tr>/gs;
|
||||||
|
|
||||||
|
let match;
|
||||||
|
while ((match = regex.exec(html)) !== null) {
|
||||||
|
const ggGuid = match[1]?.trim();
|
||||||
|
const gcGuid = match[2]?.trim();
|
||||||
|
const title = match[3]?.trim();
|
||||||
|
const dateStr = match[4]?.trim();
|
||||||
|
|
||||||
|
if (title && ggGuid && gcGuid) {
|
||||||
|
results.push({
|
||||||
|
title,
|
||||||
|
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||||
|
url: `${this.baseUrl}/cgxx/ggDetail?gcGuid=${gcGuid}&ggGuid=${ggGuid}`
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
|
|||||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||||
jest.setTimeout(120000);
|
jest.setTimeout(120000);
|
||||||
|
|
||||||
|
// 获取代理配置
|
||||||
|
const getProxyArgs = (): string[] => {
|
||||||
|
const proxyHost = process.env.PROXY_HOST;
|
||||||
|
const proxyPort = process.env.PROXY_PORT;
|
||||||
|
const proxyUsername = process.env.PROXY_USERNAME;
|
||||||
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||||
|
|
||||||
|
if (proxyHost && proxyPort) {
|
||||||
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||||
|
if (proxyUsername && proxyPassword) {
|
||||||
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
};
|
||||||
|
|
||||||
describe('SzecpCrawler Real Site Test', () => {
|
describe('SzecpCrawler Real Site Test', () => {
|
||||||
let browser: puppeteer.Browser;
|
let browser: puppeteer.Browser;
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
|
const proxyArgs = getProxyArgs();
|
||||||
|
if (proxyArgs.length > 0) {
|
||||||
|
console.log('Using proxy:', proxyArgs.join(' '));
|
||||||
|
}
|
||||||
|
|
||||||
browser = await puppeteer.launch({
|
browser = await puppeteer.launch({
|
||||||
headless: false, // Run in non-headless mode
|
headless: false, // Run in non-headless mode
|
||||||
args: [
|
args: [
|
||||||
@@ -16,6 +38,7 @@ describe('SzecpCrawler Real Site Test', () => {
|
|||||||
'--disable-blink-features=AutomationControlled',
|
'--disable-blink-features=AutomationControlled',
|
||||||
'--window-size=1920,1080',
|
'--window-size=1920,1080',
|
||||||
'--disable-infobars',
|
'--disable-infobars',
|
||||||
|
...proxyArgs,
|
||||||
],
|
],
|
||||||
defaultViewport: null
|
defaultViewport: null
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ export class BidCrawlTask {
|
|||||||
|
|
||||||
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
||||||
async handleCron() {
|
async handleCron() {
|
||||||
this.logger.debug('Scheduled crawl task started');
|
// this.logger.debug('Scheduled crawl task started');
|
||||||
await this.crawlerService.crawlAll();
|
// await this.crawlerService.crawlAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
||||||
|
|||||||
57
src/scripts/update-source.ts
Normal file
57
src/scripts/update-source.ts
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import { NestFactory } from '@nestjs/core';
|
||||||
|
import { AppModule } from '../app.module';
|
||||||
|
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||||
|
import { Repository } from 'typeorm';
|
||||||
|
import { BidItem } from '../bids/entities/bid-item.entity';
|
||||||
|
import { CustomLogger } from '../common/logger/logger.service';
|
||||||
|
|
||||||
|
async function updateSource() {
|
||||||
|
const app = await NestFactory.createApplicationContext(AppModule);
|
||||||
|
|
||||||
|
// 设置自定义 logger
|
||||||
|
const logger = await app.resolve(CustomLogger);
|
||||||
|
app.useLogger(logger);
|
||||||
|
logger.setContext('UpdateSourceScript');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 获取 BidItem 的 repository
|
||||||
|
const bidItemRepository = app.get<Repository<BidItem>>(getRepositoryToken(BidItem));
|
||||||
|
|
||||||
|
const oldSource = '北京电力交易平台';
|
||||||
|
const newSource = '北京京能电子商务平台';
|
||||||
|
|
||||||
|
logger.log(`开始更新 source 字段: "${oldSource}" -> "${newSource}"`);
|
||||||
|
|
||||||
|
// 查找需要更新的记录数量
|
||||||
|
const count = await bidItemRepository.count({
|
||||||
|
where: { source: oldSource },
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log(`找到 ${count} 条需要更新的记录`);
|
||||||
|
|
||||||
|
if (count === 0) {
|
||||||
|
logger.log('没有需要更新的记录');
|
||||||
|
await app.close();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 执行更新
|
||||||
|
const result = await bidItemRepository
|
||||||
|
.createQueryBuilder()
|
||||||
|
.update(BidItem)
|
||||||
|
.set({ source: newSource })
|
||||||
|
.where('source = :oldSource', { oldSource })
|
||||||
|
.execute();
|
||||||
|
|
||||||
|
logger.log(`成功更新 ${result.affected} 条记录`);
|
||||||
|
|
||||||
|
await app.close();
|
||||||
|
process.exit(0);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('更新失败:', error);
|
||||||
|
await app.close();
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updateSource();
|
||||||
9
test/jest-e2e.json
Normal file
9
test/jest-e2e.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"moduleFileExtensions": ["js", "json", "ts"],
|
||||||
|
"rootDir": ".",
|
||||||
|
"testEnvironment": "node",
|
||||||
|
"testRegex": ".e2e-spec.ts$",
|
||||||
|
"transform": {
|
||||||
|
"^.+\\.(t|j)s$": "ts-jest"
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user