logo-extractor.ts•11.2 kB
import axios from 'axios';
import * as cheerio from 'cheerio';
import urlParse from 'url-parse';
import { URL } from 'url';
import { Buffer } from 'buffer';
export interface LogoCandidate {
url: string;
type: 'favicon' | 'apple-touch-icon' | 'og-image' | 'logo-image' | 'brand-image';
source: string;
score: number;
attributes: Record<string, any>;
}
export interface LogoData {
buffer: Buffer;
format: string;
originalSize?: { width: number; height: number };
}
export class LogoExtractor {
private readonly userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';
async extractLogoCandidates(websiteUrl: string): Promise<LogoCandidate[]> {
try {
const normalizedUrl = this.normalizeUrl(websiteUrl);
const response = await axios.get(normalizedUrl, {
headers: { 'User-Agent': this.userAgent },
timeout: 10000,
maxRedirects: 5,
});
const $ = cheerio.load(response.data);
const candidates: LogoCandidate[] = [];
const baseUrl = new URL(normalizedUrl);
// 1. 提取favicon相关链接
await this.extractFaviconCandidates($, baseUrl, candidates);
// 2. 提取Apple Touch Icon
await this.extractAppleTouchIcons($, baseUrl, candidates);
// 3. 提取OpenGraph图像
await this.extractOpenGraphImages($, baseUrl, candidates);
// 4. 提取可能的Logo图像
await this.extractLogoImages($, baseUrl, candidates);
// 5. 尝试常见的favicon路径
await this.extractCommonFaviconPaths(baseUrl, candidates);
// 去重并评分
return this.deduplicateAndScore(candidates);
} catch (error) {
console.error(`提取Logo候选项时出错: ${error}`);
return [];
}
}
private normalizeUrl(url: string): string {
if (!url.startsWith('http://') && !url.startsWith('https://')) {
url = 'https://' + url;
}
return url;
}
private async extractFaviconCandidates($: cheerio.CheerioAPI, baseUrl: URL, candidates: LogoCandidate[]) {
$('link[rel*="icon"]').each((_, element) => {
const href = $(element).attr('href');
const sizes = $(element).attr('sizes');
const type = $(element).attr('type');
if (href) {
const absoluteUrl = this.resolveUrl(href, baseUrl);
candidates.push({
url: absoluteUrl,
type: 'favicon',
source: 'HTML link标签',
score: this.calculateFaviconScore(sizes, type),
attributes: { sizes, type, rel: $(element).attr('rel') }
});
}
});
}
private async extractAppleTouchIcons($: cheerio.CheerioAPI, baseUrl: URL, candidates: LogoCandidate[]) {
$('link[rel*="apple-touch-icon"]').each((_, element) => {
const href = $(element).attr('href');
const sizes = $(element).attr('sizes');
if (href) {
const absoluteUrl = this.resolveUrl(href, baseUrl);
candidates.push({
url: absoluteUrl,
type: 'apple-touch-icon',
source: 'Apple Touch Icon',
score: this.calculateAppleIconScore(sizes),
attributes: { sizes, rel: $(element).attr('rel') }
});
}
});
}
private async extractOpenGraphImages($: cheerio.CheerioAPI, baseUrl: URL, candidates: LogoCandidate[]) {
$('meta[property="og:image"], meta[name="og:image"]').each((_, element) => {
const content = $(element).attr('content');
if (content) {
const absoluteUrl = this.resolveUrl(content, baseUrl);
candidates.push({
url: absoluteUrl,
type: 'og-image',
source: 'OpenGraph协议',
score: 70, // 中等优先级
attributes: { property: $(element).attr('property') || $(element).attr('name') }
});
}
});
}
private async extractLogoImages($: cheerio.CheerioAPI, baseUrl: URL, candidates: LogoCandidate[]) {
// 查找可能的Logo图像
const logoSelectors = [
'img[alt*="logo" i]',
'img[src*="logo" i]',
'img[class*="logo" i]',
'img[id*="logo" i]',
'.logo img',
'#logo img',
'.brand img',
'.header img',
'.navbar-brand img'
];
logoSelectors.forEach(selector => {
$(selector).each((_, element) => {
const src = $(element).attr('src');
const alt = $(element).attr('alt');
const className = $(element).attr('class');
if (src) {
const absoluteUrl = this.resolveUrl(src, baseUrl);
candidates.push({
url: absoluteUrl,
type: 'logo-image',
source: 'Logo图像识别',
score: this.calculateLogoImageScore(alt, className, src),
attributes: { alt, class: className, selector }
});
}
});
});
}
private async extractCommonFaviconPaths(baseUrl: URL, candidates: LogoCandidate[]) {
const commonPaths = [
'/favicon.ico',
'/favicon.png',
'/apple-touch-icon.png',
'/apple-touch-icon-precomposed.png',
'/logo.png',
'/logo.svg',
'/assets/logo.png',
'/assets/logo.svg',
'/images/logo.png',
'/images/logo.svg'
];
for (const path of commonPaths) {
const url = new URL(path, baseUrl).toString();
try {
const response = await axios.head(url, {
timeout: 3000,
headers: { 'User-Agent': this.userAgent }
});
if (response.status === 200) {
candidates.push({
url,
type: path.includes('apple-touch') ? 'apple-touch-icon' : 'favicon',
source: '常见路径检测',
score: this.calculateCommonPathScore(path),
attributes: { path, contentType: response.headers['content-type'] }
});
}
} catch {
// 忽略404或其他错误
}
}
}
private calculateFaviconScore(sizes?: string, type?: string): number {
let score = 60; // 基础分数
if (sizes) {
const sizeMatch = sizes.match(/(\d+)x(\d+)/);
if (sizeMatch) {
const size = parseInt(sizeMatch[1]);
if (size >= 128) score += 30;
else if (size >= 64) score += 20;
else if (size >= 32) score += 10;
}
}
if (type) {
if (type.includes('svg')) score += 20;
else if (type.includes('png')) score += 15;
}
return Math.min(score, 100);
}
private calculateAppleIconScore(sizes?: string): number {
let score = 80; // Apple图标通常质量较高
if (sizes) {
const sizeMatch = sizes.match(/(\d+)x(\d+)/);
if (sizeMatch) {
const size = parseInt(sizeMatch[1]);
if (size >= 180) score += 20;
else if (size >= 120) score += 15;
else if (size >= 76) score += 10;
}
}
return Math.min(score, 100);
}
private calculateLogoImageScore(alt?: string, className?: string, src?: string): number {
let score = 50; // 基础分数
// 检查alt属性
if (alt) {
const altLower = alt.toLowerCase();
if (altLower.includes('logo')) score += 25;
if (altLower.includes('brand')) score += 15;
if (altLower.includes('company')) score += 10;
}
// 检查class属性
if (className) {
const classLower = className.toLowerCase();
if (classLower.includes('logo')) score += 20;
if (classLower.includes('brand')) score += 15;
}
// 检查src路径
if (src) {
const srcLower = src.toLowerCase();
if (srcLower.includes('logo')) score += 15;
if (srcLower.includes('brand')) score += 10;
if (srcLower.includes('.svg')) score += 10;
}
return Math.min(score, 100);
}
private calculateCommonPathScore(path: string): number {
const pathScores: Record<string, number> = {
'/favicon.ico': 70,
'/favicon.png': 75,
'/apple-touch-icon.png': 85,
'/logo.png': 90,
'/logo.svg': 95,
'/assets/logo.png': 85,
'/assets/logo.svg': 90,
};
return pathScores[path] || 60;
}
private resolveUrl(href: string, baseUrl: URL): string {
try {
return new URL(href, baseUrl).toString();
} catch {
return href;
}
}
private deduplicateAndScore(candidates: LogoCandidate[]): LogoCandidate[] {
const uniqueCandidates = new Map<string, LogoCandidate>();
candidates.forEach(candidate => {
const existing = uniqueCandidates.get(candidate.url);
if (!existing || candidate.score > existing.score) {
uniqueCandidates.set(candidate.url, candidate);
}
});
return Array.from(uniqueCandidates.values())
.sort((a, b) => b.score - a.score);
}
selectBestLogo(candidates: LogoCandidate[]): LogoCandidate {
if (candidates.length === 0) {
throw new Error('没有可用的Logo候选项');
}
// 优先选择高分的SVG或高质量PNG
const sortedCandidates = candidates.sort((a, b) => {
// SVG格式加分
const aIsSvg = a.url.toLowerCase().includes('.svg') || a.attributes?.type?.includes('svg');
const bIsSvg = b.url.toLowerCase().includes('.svg') || b.attributes?.type?.includes('svg');
if (aIsSvg && !bIsSvg) return -1;
if (!aIsSvg && bIsSvg) return 1;
// 按分数排序
return b.score - a.score;
});
return sortedCandidates[0];
}
async downloadLogo(candidate: LogoCandidate): Promise<LogoData> {
try {
const response = await axios.get(candidate.url, {
responseType: 'arraybuffer',
headers: { 'User-Agent': this.userAgent },
timeout: 10000,
});
const buffer = Buffer.from(response.data);
const contentType = response.headers['content-type'] || '';
let format = 'unknown';
if (contentType.includes('svg')) format = 'svg';
else if (contentType.includes('png')) format = 'png';
else if (contentType.includes('jpeg') || contentType.includes('jpg')) format = 'jpeg';
else if (contentType.includes('gif')) format = 'gif';
else if (contentType.includes('webp')) format = 'webp';
else if (contentType.includes('ico')) format = 'ico';
return {
buffer,
format,
originalSize: await this.getImageSize(buffer, format)
};
} catch (error) {
throw new Error(`下载Logo失败: ${error}`);
}
}
private async getImageSize(buffer: Buffer, format: string): Promise<{ width: number; height: number } | undefined> {
try {
if (format === 'svg') {
const svgContent = buffer.toString('utf-8');
const widthMatch = svgContent.match(/width="(\d+)"/);
const heightMatch = svgContent.match(/height="(\d+)"/);
if (widthMatch && heightMatch) {
return {
width: parseInt(widthMatch[1]),
height: parseInt(heightMatch[1])
};
}
}
// 对于其他格式,这里可以使用sharp库来获取尺寸
// 但为了简化,我们暂时返回undefined
return undefined;
} catch {
return undefined;
}
}
}