logo-extractor.ts•9.49 kB
import axios from 'axios';
import * as cheerio from 'cheerio';
import parseUrl from 'url-parse';
import sizeOf from 'image-size';
export interface LogoCandidate {
url: string;
type: 'favicon' | 'apple-touch-icon' | 'og:image' | 'logo-class' | 'brand-image';
size?: { width: number; height: number };
score: number; // 评分,用于选择最佳Logo
}
export interface LogoAnalysis {
url: string;
candidates: LogoCandidate[];
bestCandidate?: LogoCandidate;
extractionTime: number;
status: 'success' | 'failed' | 'no-logo';
}
/**
* Logo提取器
* 负责从网站中智能识别和提取Logo
*/
export class LogoExtractor {
private readonly userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';
private readonly timeout = 10000; // 10秒超时
/**
* 从指定URL提取Logo
*/
async extractLogo(url: string): Promise<Buffer | null> {
try {
const analysis = await this.analyzeLogo(url);
if (analysis.status === 'failed' || !analysis.bestCandidate) {
return null;
}
// 下载最佳候选Logo
const logoBuffer = await this.downloadImage(analysis.bestCandidate.url);
return logoBuffer;
} catch (error) {
console.error('Logo提取失败:', error);
return null;
}
}
/**
* 分析网站的Logo信息
*/
async analyzeLogo(url: string): Promise<LogoAnalysis> {
const startTime = Date.now();
const analysis: LogoAnalysis = {
url,
candidates: [],
extractionTime: 0,
status: 'failed',
};
try {
// 标准化URL
const normalizedUrl = this.normalizeUrl(url);
// 获取网页内容
const html = await this.fetchHtml(normalizedUrl);
const $ = cheerio.load(html);
const baseUrl = this.getBaseUrl(normalizedUrl);
// 提取各种类型的Logo候选
const candidates: LogoCandidate[] = [];
// 1. 提取favicon
await this.extractFavicons($, baseUrl, candidates);
// 2. 提取Apple Touch图标
await this.extractAppleTouchIcons($, baseUrl, candidates);
// 3. 提取OpenGraph图像
await this.extractOpenGraphImages($, baseUrl, candidates);
// 4. 提取通过CSS类名识别的Logo
await this.extractLogoByClass($, baseUrl, candidates);
// 5. 提取品牌相关图像
await this.extractBrandImages($, baseUrl, candidates);
// 过滤和评分候选Logo
analysis.candidates = await this.filterAndScoreCandidates(candidates);
// 选择最佳候选
analysis.bestCandidate = this.selectBestCandidate(analysis.candidates);
analysis.status = analysis.bestCandidate ? 'success' : 'no-logo';
analysis.extractionTime = Date.now() - startTime;
return analysis;
} catch (error) {
console.error('Logo分析失败:', error);
analysis.extractionTime = Date.now() - startTime;
return analysis;
}
}
private normalizeUrl(url: string): string {
if (!url.startsWith('http://') && !url.startsWith('https://')) {
url = 'https://' + url;
}
return url;
}
private async fetchHtml(url: string): Promise<string> {
const response = await axios.get(url, {
headers: { 'User-Agent': this.userAgent },
timeout: this.timeout,
maxRedirects: 5,
});
return response.data;
}
private getBaseUrl(url: string): string {
const parsed = parseUrl(url);
return `${parsed.protocol}//${parsed.host}`;
}
private async extractFavicons($: cheerio.CheerioAPI, baseUrl: string, candidates: LogoCandidate[]) {
// 标准favicon
$('link[rel*="icon"]').each((_, element) => {
const href = $(element).attr('href');
if (href) {
candidates.push({
url: this.resolveUrl(href, baseUrl),
type: 'favicon',
score: 0, // 将在后续评分
});
}
});
// 默认favicon.ico
candidates.push({
url: `${baseUrl}/favicon.ico`,
type: 'favicon',
score: 0,
});
}
private async extractAppleTouchIcons($: cheerio.CheerioAPI, baseUrl: string, candidates: LogoCandidate[]) {
$('link[rel*="apple-touch-icon"]').each((_, element) => {
const href = $(element).attr('href');
if (href) {
candidates.push({
url: this.resolveUrl(href, baseUrl),
type: 'apple-touch-icon',
score: 0,
});
}
});
}
private async extractOpenGraphImages($: cheerio.CheerioAPI, baseUrl: string, candidates: LogoCandidate[]) {
$('meta[property="og:image"]').each((_, element) => {
const content = $(element).attr('content');
if (content) {
candidates.push({
url: this.resolveUrl(content, baseUrl),
type: 'og:image',
score: 0,
});
}
});
}
private async extractLogoByClass($: cheerio.CheerioAPI, baseUrl: string, candidates: LogoCandidate[]) {
// 通过常见的Logo类名查找
const logoSelectors = [
'img[class*="logo"]',
'img[id*="logo"]',
'img[alt*="logo"]',
'.logo img',
'#logo img',
'.brand img',
'.header-logo img',
'.navbar-brand img',
];
logoSelectors.forEach(selector => {
$(selector).each((_, element) => {
const src = $(element).attr('src');
if (src) {
candidates.push({
url: this.resolveUrl(src, baseUrl),
type: 'logo-class',
score: 0,
});
}
});
});
}
private async extractBrandImages($: cheerio.CheerioAPI, baseUrl: string, candidates: LogoCandidate[]) {
// 查找可能的品牌图像
$('img').each((_, element) => {
const src = $(element).attr('src');
const alt = $(element).attr('alt') || '';
const className = $(element).attr('class') || '';
if (src && (
alt.toLowerCase().includes('brand') ||
className.toLowerCase().includes('brand') ||
src.toLowerCase().includes('brand')
)) {
candidates.push({
url: this.resolveUrl(src, baseUrl),
type: 'brand-image',
score: 0,
});
}
});
}
private resolveUrl(url: string, baseUrl: string): string {
if (url.startsWith('http://') || url.startsWith('https://')) {
return url;
}
if (url.startsWith('//')) {
return 'https:' + url;
}
if (url.startsWith('/')) {
return baseUrl + url;
}
return baseUrl + '/' + url;
}
private async filterAndScoreCandidates(candidates: LogoCandidate[]): Promise<LogoCandidate[]> {
const validCandidates: LogoCandidate[] = [];
for (const candidate of candidates) {
try {
// 检查图像是否可访问
const isValid = await this.validateImage(candidate.url);
if (!isValid) continue;
// 获取图像尺寸
const imageBuffer = await this.downloadImage(candidate.url);
if (!imageBuffer) continue;
const dimensions = sizeOf(imageBuffer);
if (dimensions.width && dimensions.height) {
candidate.size = {
width: dimensions.width,
height: dimensions.height,
};
}
// 计算评分
candidate.score = this.calculateScore(candidate);
validCandidates.push(candidate);
} catch (error) {
// 忽略无效的候选项
continue;
}
}
return validCandidates.sort((a, b) => b.score - a.score);
}
private calculateScore(candidate: LogoCandidate): number {
let score = 0;
// 基础类型评分
switch (candidate.type) {
case 'logo-class':
score += 100;
break;
case 'apple-touch-icon':
score += 80;
break;
case 'favicon':
score += 60;
break;
case 'brand-image':
score += 70;
break;
case 'og:image':
score += 40;
break;
}
// 尺寸评分
if (candidate.size) {
const { width, height } = candidate.size;
const minDimension = Math.min(width, height);
const maxDimension = Math.max(width, height);
// 偏好正方形或接近正方形的图像
const aspectRatio = maxDimension / minDimension;
if (aspectRatio <= 1.5) {
score += 20;
}
// 偏好适中尺寸的图像
if (minDimension >= 32 && minDimension <= 512) {
score += Math.min(minDimension / 4, 50);
}
}
return score;
}
private selectBestCandidate(candidates: LogoCandidate[]): LogoCandidate | undefined {
return candidates.length > 0 ? candidates[0] : undefined;
}
private async validateImage(url: string): Promise<boolean> {
try {
const response = await axios.head(url, {
timeout: 5000,
headers: { 'User-Agent': this.userAgent },
});
const contentType = response.headers['content-type'];
return contentType?.startsWith('image/') || false;
} catch {
return false;
}
}
private async downloadImage(url: string): Promise<Buffer | null> {
try {
const response = await axios.get(url, {
responseType: 'arraybuffer',
timeout: this.timeout,
headers: { 'User-Agent': this.userAgent },
});
return Buffer.from(response.data);
} catch (error) {
console.error('图像下载失败:', url, error);
return null;
}
}
}