
Elasticsearch:全文搜索与分析
索引映射设计
import { Client } from '@elastic/elasticsearch';
const client = new Client({ node: 'http://localhost:9200' });
// 创建带有显式映射的索引
await client.indices.create({
index: 'products',
body: {
settings: {
number_of_shards: 3,
number_of_replicas: 1,
analysis: {
analyzer: {
product_analyzer: {
type: 'custom',
tokenizer: 'standard',
filter: ['lowercase', 'stop', 'stemmer'],
},
},
},
},
mappings: {
properties: {
id: { type: 'keyword' },
name: {
type: 'text',
analyzer: 'product_analyzer',
fields: {
keyword: { type: 'keyword' }, // 用于排序/聚合
suggest: { type: 'completion' }, // 用于自动补全
},
},
description: { type: 'text', analyzer: 'product_analyzer' },
price: { type: 'double' },
category: { type: 'keyword' },
tags: { type: 'keyword' },
brand: { type: 'keyword' },
inStock: { type: 'boolean' },
rating: { type: 'float' },
createdAt: { type: 'date' },
},
},
},
});

全文搜索
// 基本的多字段搜索
const results = await client.search({
index: 'products',
body: {
query: {
multi_match: {
query: 'wireless noise cancelling headphones',
fields: ['name^3', 'description', 'tags^2'], // 提升 name 和 tags 的权重
type: 'best_fields',
fuzziness: 'AUTO', // 处理拼写错误
},
},
highlight: {
fields: { name: {}, description: { fragment_size: 150 } },
},
size: 20,
from: 0,
},
});
// 带过滤器的布尔查询
const filtered = await client.search({
index: 'products',
body: {
query: {
bool: {
must: [
{ multi_match: { query: 'headphones', fields: ['name', 'description'] } },
],
filter: [
{ term: { inStock: true } },
{ terms: { category: ['electronics', 'audio'] } },
{ range: { price: { gte: 50, lte: 500 } } },
{ range: { rating: { gte: 4.0 } } },
],
should: [
{ term: { brand: 'Sony' } }, // 提升 Sony 产品的权重
],
minimum_should_match: 0,
},
},
sort: [
{ _score: { order: 'desc' } },
{ rating: { order: 'desc' } },
],
},
});

用于分面搜索的聚合
const facets = await client.search({
index: 'products',
body: {
query: { match: { name: 'headphones' } },
aggs: {
categories: {
terms: { field: 'category', size: 10 },
},
brands: {
terms: { field: 'brand', size: 10 },
},
price_ranges: {
range: {
field: 'price',
ranges: [
{ key: 'under_50', to: 50 },
{ key: '50_to_200', from: 50, to: 200 },
{ key: '200_to_500', from: 200, to: 500 },
{ key: 'over_500', from: 500 },
],
},
},
avg_rating: {
avg: { field: 'rating' },
},
stats: {
stats: { field: 'price' },
},
},
size: 20,
},
});

使用 Completion Suggester 实现自动补全
// 使用 completion 字段索引
const suggest = await client.search({
index: 'products',
body: {
suggest: {
product_suggest: {
prefix: 'wireles',
completion: {
field: 'name.suggest',
size: 5,
fuzzy: { fuzziness: 1 },
},
},
},
},
});
const suggestions = suggest.suggest.product_suggest[0].options
.map(o => ({ text: o.text, id: o._source.id }));
索引管理
# 检查集群健康状态
curl http://localhost:9200/_cluster/health?pretty
# 索引统计信息
curl http://localhost:9200/products/_stats?pretty
# 对读密集型索引进行强制合并
curl -X POST http://localhost:9200/products/_forcemerge?max_num_segments=1
# 索引生命周期管理 (ILM)
# 自动滚动大型索引
Elasticsearch 在全文搜索方面表现出色;对于简单用例,可以使用 PostgreSQL 的 tsvector。