feat: 添加Tailwind CSS支持并修复URL拼接问题
添加Tailwind CSS及相关配置 修复多个爬虫服务中的URL拼接问题,避免双斜杠 调整前端导航菜单项顺序
This commit is contained in:
@@ -55,7 +55,7 @@ export interface CdtResult {
|
||||
export const CdtCrawler = {
|
||||
name: '中国大唐集团电子商务平台',
|
||||
url: 'https://tang.cdt-ec.com/home/index.html',
|
||||
baseUrl: 'https://tang.cdt-ec.com/',
|
||||
baseUrl: 'https://tang.cdt-ec.com',
|
||||
|
||||
async crawl(browser: puppeteer.Browser): Promise<CdtResult[]> {
|
||||
const logger = new Logger('CdtCrawler');
|
||||
@@ -252,10 +252,11 @@ export const CdtCrawler = {
|
||||
const dateStr = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
const fullUrl = url.startsWith('http') ? url : this.baseUrl + url;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -132,7 +132,7 @@ export const CeicCrawler = {
|
||||
allResults.push(...pageResults.map(r => ({
|
||||
title: r.title,
|
||||
publishDate: r.dateStr ? new Date(r.dateStr) : new Date(),
|
||||
url: r.url
|
||||
url: r.url.replace(/\/\//g, '/')
|
||||
})));
|
||||
|
||||
logger.log(`Extracted ${pageResults.length} items.`);
|
||||
|
||||
@@ -190,10 +190,11 @@ export const CgnpcCrawler = {
|
||||
const dateStr = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
const fullUrl = url.startsWith('http') ? url : this.baseUrl + url;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,10 +105,11 @@ export const ChdtpCrawler = {
|
||||
const dateStr = match[5]?.trim();
|
||||
|
||||
if (title && urlSuffix) {
|
||||
const fullUrl = this.baseUrl + urlSuffix;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: this.baseUrl + urlSuffix
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,7 +206,7 @@ export const ChngCrawler = {
|
||||
allResults.push(...pageResults.map(r => ({
|
||||
title: r!.title,
|
||||
publishDate: new Date(r!.dateStr),
|
||||
url: r!.url
|
||||
url: r!.url.replace(/\/\//g, '/')
|
||||
})));
|
||||
|
||||
logger.log(`Extracted ${pageResults.length} items.`);
|
||||
|
||||
@@ -181,10 +181,11 @@ export const CnncecpCrawler = {
|
||||
const title = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
const fullUrl = url.startsWith('http') ? url : this.baseUrl + url;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,10 +182,11 @@ export const CnoocCrawler = {
|
||||
const dateStr = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
const fullUrl = url.startsWith('http') ? url : this.baseUrl + url;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,10 +188,11 @@ export const EpsCrawler = {
|
||||
const dateStr = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
const fullUrl = url.startsWith('http') ? url : this.baseUrl + url;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -234,10 +234,11 @@ export const EspicCrawler = {
|
||||
const dateStr = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
const fullUrl = url.startsWith('http') ? url : this.baseUrl + url;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -185,10 +185,11 @@ export const PowerbeijingCrawler = {
|
||||
const dateStr = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
const fullUrl = url.startsWith('http') ? url : this.baseUrl + url;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,10 +190,11 @@ export const SdiccCrawler = {
|
||||
const dateStr = match[4]?.trim();
|
||||
|
||||
if (title && ggGuid && gcGuid) {
|
||||
const fullUrl = `${this.baseUrl}/cgxx/ggDetail?gcGuid=${gcGuid}&ggGuid=${ggGuid}`;
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: `${this.baseUrl}/cgxx/ggDetail?gcGuid=${gcGuid}&ggGuid=${ggGuid}`
|
||||
url: fullUrl.replace(/\/\//g, '/')
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +134,7 @@ export const SzecpCrawler = {
|
||||
allResults.push(...pageResults.map(r => ({
|
||||
title: r!.title,
|
||||
publishDate: new Date(r!.dateStr),
|
||||
url: r!.url
|
||||
url: r!.url.replace(/\/\//g, '/')
|
||||
})));
|
||||
|
||||
logger.log(`Extracted ${pageResults.length} items.`);
|
||||
|
||||
Reference in New Issue
Block a user