Koa.js 搜索引擎集成与全文搜索实战
引言
随着业务数据的增长,简单的关系型数据库查询已无法满足复杂的搜索需求。本文介绍如何在 Koa.js 项目中集成 Elasticsearch,实现高效的全文搜索、聚合分析和高亮显示。
技术架构
| 组件 | 作用 | 版本 |
|---|---|---|
| Elasticsearch | 分布式搜索引擎 | 8.x |
| ik_smart | 中文智能分词 | 8.x |
| Kibana | 可视化管理和调试 | 8.x |
环境安装与配置
# 安装 Elasticsearch 客户端 npm install @elastic/elasticsearch # Docker 启动 Elasticsearch(开发环境) docker run -d \ --name elasticsearch \ -p 9200:9200 \ -p 9300:9300 \ -e "discovery.type=single-node" \ -e "xpack.security.enabled=false" \ docker.elastic.co/elasticsearch/elasticsearch:8.11.0 # 安装中文分词插件 docker exec -it elasticsearch \ elasticsearch-plugin install analysis-ik
ES 客户端封装
// elasticsearch/client.js
import { Client } from '@elastic/elasticsearch';
import { koaResMiddleware } from './middleware/elasticsearch';
class ESClient {
constructor() {
this.client = new Client({
node: process.env.ES_HOST || 'http://localhost:9200',
// 生产环境建议配置认证
// auth: {
// username: 'elastic',
// password: 'your-password'
// },
requestTimeout: 30000,
maxRetries: 3
});
}
// 检查集群健康状态
async checkHealth() {
const health = await this.client.cluster.health();
return health;
}
// 创建索引(带中文分词)
async createIndex(indexName, mappings) {
const exists = await this.client.indices.exists({ index: indexName });
if (exists) {
console.log(`Index ${indexName} already exists`);
return;
}
await this.client.indices.create({
index: indexName,
body: {
settings: {
analysis: {
analyzer: {
// 中文智能分析器
ik_analyzer: {
type: 'custom',
tokenizer: 'ik_smart',
filter: ['ascii_folding']
},
// 中文最大力度分词
ik_max_analyzer: {
type: 'custom',
tokenizer: 'ik_max_word',
filter: ['ascii_folding']
}
},
filter: {
ascii_folding: {
type: 'asciifolding',
preserve_original: true
}
}
},
// 默认分词器
index: {
number_of_shards: 3,
number_of_replicas: 1
}
},
mappings
}
});
}
// 索引文档
async index(indexName, id, document) {
const result = await this.client.index({
index: indexName,
id,
document,
refresh: true
});
return result;
}
// 批量索引
async bulkIndex(indexName, documents) {
const operations = documents.flatMap(doc => [
{ index: { _index: indexName, _id: doc.id } },
doc
]);
const result = await this.client.bulk({ operations, refresh: true });
return result;
}
// 搜索
async search(indexName, query) {
const result = await this.client.search({
index: indexName,
body: query
});
return result;
}
// 删除文档
async delete(indexName, id) {
await this.client.delete({ index: indexName, id, refresh: true });
}
// 获取 Middleware
getMiddleware() {
return koaResMiddleware(this.client);
}
}
export default new ESClient();
商品搜索服务实现
// services/searchService.js
import esClient from '../elasticsearch/client';
class ProductSearchService {
constructor() {
this.indexName = 'products';
this.initIndex();
}
// 初始化索引
async initIndex() {
const mappings = {
properties: {
// 商品基本信息
productId: { type: 'keyword' },
productName: {
type: 'text',
analyzer: 'ik_max_word',
search_analyzer: 'ik_smart',
fields: {
keyword: { type: 'keyword' },
pinyin: { type: 'text', analyzer: 'pinyin' }
}
},
description: {
type: 'text',
analyzer: 'ik_max_word'
},
category: { type: 'keyword' },
categoryPath: { type: 'keyword' },
// 价格相关
price: { type: 'scaled_float', scaling_factor: 100 },
originalPrice: { type: 'scaled_float', scaling_factor: 100 },
discount: { type: 'float' },
// 库存与销量
stock: { type: 'integer' },
salesCount: { type: 'integer' },
// 属性
brand: { type: 'keyword' },
tags: { type: 'keyword' },
attributes: {
type: 'nested',
properties: {
name: { type: 'keyword' },
value: { type: 'keyword' }
}
},
// 时间
createTime: { type: 'date' },
updateTime: { type: 'date' },
onShelfTime: { type: 'date' },
// 状态
status: { type: 'keyword' },
isHot: { type: 'boolean' },
isNew: { type: 'boolean' }
}
};
await esClient.createIndex(this.indexName, mappings);
}
// 索引商品
async indexProduct(product) {
const document = {
...product,
createTime: new Date(product.createTime),
updateTime: new Date(product.updateTime),
onShelfTime: product.onShelfTime ? new Date(product.onShelfTime) : null
};
return await esClient.index(this.indexName, product.productId, document);
}
// 搜索商品
async searchProducts(params) {
const {
keyword,
category,
minPrice,
maxPrice,
brand,
tags = [],
page = 1,
size = 20,
sort = 'relevance',
highlight = true
} = params;
const must = [
{ term: { status: 'active' } }
];
// 关键词搜索
if (keyword) {
must.push({
multi_match: {
query: keyword,
// 标题权重更高
fields: ['productName^3', 'description', 'brand', 'tags^2'],
type: 'best_fields',
fuzziness: 'AUTO'
}
});
}
// 分类筛选
if (category) {
must.push({
bool: {
should: [
{ term: { category: category } },
{ term: { categoryPath: category } }
]
}
});
}
// 价格范围
if (minPrice !== undefined || maxPrice !== undefined) {
const range = {};
if (minPrice !== undefined) range.gte = minPrice;
if (maxPrice !== undefined) range.lte = maxPrice;
must.push({ range: { price: range } });
}
// 品牌筛选
if (brand) {
must.push({ term: { brand: brand } });
}
// 标签筛选(满足任一标签)
if (tags.length > 0) {
must.push({ terms: { tags: tags } });
}
// 构建查询
const query = {
from: (page - 1) * size,
size,
query: {
bool: { must }
},
// 高亮配置
highlight: highlight ? {
pre_tags: [''],
post_tags: [''],
fields: {
productName: { number_of_fragments: 0 },
description: { fragment_size: 100 }
}
} : undefined,
// 排序
sort: this.buildSort(sort, keyword),
// 聚合统计
aggs: {
category_agg: { terms: { field: 'category', size: 20 } },
brand_agg: { terms: { field: 'brand', size: 30 } },
price_stats: { stats: { field: 'price' } }
}
};
const result = await esClient.search(this.indexName, query);
// 格式化返回
return {
total: result.hits.total.value,
products: result.hits.hits.map(hit => ({
...hit._source,
_score: hit._score,
_highlight: hit.highlight
})),
aggregations: {
categories: result.aggregations.category_agg.buckets,
brands: result.aggregations.brand_agg.buckets,
priceStats: result.aggregations.price_stats
}
};
}
// 构建排序
buildSort(sort, hasKeyword) {
switch (sort) {
case 'price_asc':
return [{ price: 'asc' }, '_doc'];
case 'price_desc':
return [{ price: 'desc' }, '_doc'];
case 'sales_desc':
return [{ salesCount: 'desc' }, '_doc'];
case 'newest':
return [{ onShelfTime: 'desc' }, '_doc'];
case 'relevance':
default:
// 有关键词时按相关度,无关键词时按销量
return hasKeyword ? ['_score', { salesCount: 'desc' }] : [{ salesCount: 'desc' }, '_doc'];
}
}
// 智能搜索建议
async suggest(keyword, size = 10) {
const result = await esClient.search(this.indexName, {
size: 0,
suggest: {
product_suggest: {
prefix: keyword,
completion: {
field: 'productName.keyword',
size: size,
skip_duplicates: true,
contexts: {
status: ['active']
}
}
}
}
});
return result.suggest.product_suggest[0].options.map(opt => ({
text: opt._source.productName,
productId: opt._id,
score: opt._score
}));
}
// 聚合分析
async aggregateProducts(field, size = 50) {
const result = await esClient.search(this.indexName, {
size: 0,
body: {
aggs: {
field_agg: {
terms: {
field: field,
size: size,
order: { _count: 'desc' }
}
}
}
}
});
return result.aggregations.field_agg.buckets;
}
// 删除商品索引
async deleteProduct(productId) {
await esClient.delete(this.indexName, productId);
}
}
export default new ProductSearchService();
API 路由集成
// routes/search.js
import Router from 'koa-router';
import productSearchService from '../services/searchService';
const router = new Router({ prefix: '/api/search' });
// 商品搜索
router.get('/products', async ctx => {
const {
keyword,
category,
minPrice,
maxPrice,
brand,
tags,
page = 1,
size = 20,
sort = 'relevance'
} = ctx.query;
try {
const result = await productSearchService.searchProducts({
keyword,
category,
minPrice: minPrice ? parseFloat(minPrice) : undefined,
maxPrice: maxPrice ? parseFloat(maxPrice) : undefined,
brand,
tags: tags ? tags.split(',') : undefined,
page: parseInt(page),
size: Math.min(parseInt(size), 100),
sort
});
ctx.body = {
code: 0,
data: result
};
} catch (error) {
ctx.status = 500;
ctx.body = {
code: 500,
message: error.message
};
}
});
// 搜索建议
router.get('/suggest', async ctx => {
const { keyword, size = 10 } = ctx.query;
if (!keyword) {
ctx.body = { code: 0, data: [] };
return;
}
const suggestions = await productSearchService.suggest(keyword, parseInt(size));
ctx.body = { code: 0, data: suggestions };
});
// 聚合统计
router.get('/aggregations/:field', async ctx => {
const { field } = ctx.params;
const validFields = ['category', 'brand', 'tags', 'attributes'];
if (!validFields.includes(field)) {
ctx.throw(400, 'Invalid aggregation field');
}
const result = await productSearchService.aggregateProducts(field);
ctx.body = { code: 0, data: result };
});
// 重建索引(管理接口)
router.post('/rebuild', async ctx => {
// 从数据库同步所有商品到 ES
// 实际生产中应使用 scroll API 分批处理
const products = await Product.findAll({ where: { status: 'active' } });
await productSearchService.bulkIndex(
products.map(p => ({
id: p.id,
...p.toJSON()
}))
);
ctx.body = {
code: 0,
message: `Indexed ${products.length} products`
};
});
export default router;
性能优化技巧
- 分页优化:使用 search_after 避免深度分页
- 索引优化:合理设置分片数和副本数
- 查询优化:使用 filter 替代 must 走缓存
- 写入优化:批量写入,使用 refresh: false 延迟刷新
- 分词器选择:查询时用 ik_smart,索引时用 ik_max_word
总结
Koa.js 集成 Elasticsearch 实现全文搜索的核心要点:
- 中文分词:安装 ik 分词器,实现智能中文分词
- 查询构建:根据业务需求灵活组合多条件查询
- 高亮显示:搜索关键词高亮展示,提升用户体验
- 聚合统计:利用 ES 聚合能力实现分类统计
- 性能优化:避免深度分页,合理配置索引参数
通过 Elasticsearch 的强大搜索能力,可以实现类似电商平台的高性能商品搜索体验。