搜索引擎集成与查询优化
大约 14 分钟约 4085 字
搜索引擎集成与查询优化
简介
搜索是用户与应用交互的核心入口之一。一个高质量的搜索系统不仅需要快速返回结果,还需要理解用户意图、处理中文分词、支持模糊匹配和拼写纠错。Elasticsearch 是当前最流行的全文搜索引擎,在 .NET 生态中通过 NEST 或 Elastic.Clients.Elasticsearch 客户端库与之集成。
本文将系统讲解如何在 ASP.NET Core 应用中集成 Elasticsearch,从索引设计、中文分词、查询优化到生产监控,覆盖搜索系统的完整生命周期。
特点
- 全文检索:基于倒排索引,支持毫秒级全文搜索
- 中文分词:IK Analyzer 支持 Smart 和 Max Word 两种分词模式
- 相关性调优:通过 Boost、Function Score 等机制精确控制排序
- 聚合分析:支持多维度的统计分析,比 SQL GROUP BY 更灵活
- 近实时:文档写入后约 1 秒即可被搜索到
核心实现
一、客户端初始化与连接管理
// ============ 安装 NuGet: Elastic.Clients.Elasticsearch ============
// Program.cs
builder.Services.AddSingleton<IElasticClient>(sp =>
{
var config = sp.GetRequiredService<IConfiguration>();
var settings = new ElasticsearchClientSettings(new Uri(config["Elasticsearch:Url"]!))
.DefaultIndex("products")
.BasicAuthentication(config["Elasticsearch:Username"], config["Elasticsearch:Password"])
.ConnectionLimit(10) // 连接池大小
.RequestTimeout(TimeSpan.FromSeconds(30)) // 请求超时
.EnableDebugMode(() => config.GetValue("Elasticsearch:Debug", false))
.PingTimeout(TimeSpan.FromSeconds(5)) // Ping 超时
.DeadTimeout(TimeSpan.FromSeconds(10)) // 死节点超时
.SniffLifeSpan(TimeSpan.FromMinutes(2)) // 嗅探间隔
.OnRequestCompleted(callDetails =>
{
// 请求完成回调,用于日志记录
var logger = sp.GetRequiredService<ILogger<Program>>();
if (callDetails.HttpStatusCode == null)
{
logger.LogWarning("Elasticsearch 请求失败: {Method} {Uri}",
callDetails.HttpMethod, callDetails.Uri);
}
else if (callDetails.HttpStatusCode >= 400)
{
logger.LogWarning("Elasticsearch 返回错误: {Status} {Method} {Uri}",
callDetails.HttpStatusCode, callDetails.HttpMethod, callDetails.Uri);
}
});
return new ElasticsearchClient(settings);
});二、索引设计
// ============ 文档模型定义 ============
public class ProductDocument
{
[PropertyName("id")]
public string Id { get; set; } = string.Empty;
[PropertyName("name")]
[Text(Analyzer = "ik_max_word", SearchAnalyzer = "ik_smart")]
public string Name { get; set; } = string.Empty;
[PropertyName("name_suggest")]
public CompletionField NameSuggest { get; set; } = new();
[PropertyName("description")]
[Text(Analyzer = "ik_max_word", SearchAnalyzer = "ik_smart")]
public string Description { get; set; } = string.Empty;
[PropertyName("category")]
[Keyword]
public string Category { get; set; } = string.Empty;
[PropertyName("brand")]
[Keyword]
public string Brand { get; set; } = string.Empty;
[PropertyName("price")]
public decimal Price { get; set; }
[PropertyName("sales_count")]
public int SalesCount { get; set; }
[PropertyName("rating")]
public double Rating { get; set; }
[PropertyName("tags")]
[Keyword]
public List<string> Tags { get; set; } = new();
[PropertyName("is_available")]
public bool IsAvailable { get; set; }
[PropertyName("created_at")]
public DateTime CreatedAt { get; set; }
[PropertyName("updated_at")]
public DateTime UpdatedAt { get; set; }
[PropertyName("popularity")]
public double Popularity { get; set; }
}
// ============ 索引创建与管理 ============
public class ElasticsearchIndexManager
{
private readonly IElasticClient _client;
private readonly ILogger<ElasticsearchIndexManager> _logger;
public ElasticsearchIndexManager(IElasticClient client, ILogger<ElasticsearchIndexManager> logger)
{
_client = client;
_logger = logger;
}
/// <summary>
/// 创建产品索引(带 IK 分词器配置)
/// </summary>
public async Task CreateProductIndexAsync(string indexName = "products")
{
var existsResponse = await _client.Indices.ExistsAsync(indexName);
if (existsResponse.Exists)
{
_logger.LogInformation("索引 {Index} 已存在", indexName);
return;
}
var createResponse = await _client.Indices.CreateAsync(indexName, c => c
.Settings(s => s
.NumberOfShards(3)
.NumberOfReplicas(1)
.RefreshInterval("1s")
.Analysis(a => a
.Analyzer(an => an
// 自定义分词器:拼音搜索
.Custom("pinyin_analyzer", ca => ca
.Tokenizer("standard")
.Filter("lowercase", "pinyin_filter"))
// 自定义分词器:同义词搜索
.Custom("synonym_analyzer", ca => ca
.Tokenizer("ik_max_word")
.Filter("lowercase", "synonym_filter"))
)
.TokenFilter(tf => tf
// 拼音过滤器(需安装 elasticsearch-analysis-pinyin 插件)
.Pinyin("pinyin_filter", pf => pf
.KeepFirstLetter(true)
.KeepSeparateFirstLetter(false)
.KeepFullPinyin(true)
.KeepOriginal(true)
.LimitFirstLetterLength(16)
.Lowercase(true))
// 同义词过滤器
.Synonym("synonym_filter", sf => sf
.Synonyms(
"手机,智能手机,电话",
"笔记本,笔记本电脑,手提电脑",
"耳机,耳麦,耳塞"))
)
)
)
.Map<ProductDocument>(m => m
.Properties(p => p
.Text(t => t
.Name(n => n.Name)
.Analyzer("ik_max_word")
.SearchAnalyzer("ik_smart")
.Fields(f => f
.Keyword(k => k.Name("keyword"))
.Completion(c => c.Name("suggest"))
.Text(tt => tt.Name("pinyin").Analyzer("pinyin_analyzer"))
))
.Text(t => t
.Name(n => n.Description)
.Analyzer("ik_max_word")
.SearchAnalyzer("ik_smart"))
.Keyword(k => k.Name(n => n.Category))
.Keyword(k => k.Name(n => n.Brand))
.Number(n => n.Name(nn => nn.Price).Type(NumberType.Double))
.Number(n => n.Name(nn => nn.SalesCount).Type(NumberType.Integer))
.Number(n => n.Name(nn => nn.Rating).Type(NumberType.Double))
.Keyword(k => k.Name(n => n.Tags))
.Boolean(b => b.Name(n => n.IsAvailable))
.Date(d => d.Name(n => n.CreatedAt))
.Date(d => d.Name(n => n.UpdatedAt))
.Number(n => n.Name(nn => nn.Popularity).Type(NumberType.Double))
)
)
);
if (createResponse.IsValid)
{
_logger.LogInformation("索引 {Index} 创建成功", indexName);
}
else
{
_logger.LogError("索引创建失败: {Error}", createResponse.DebugInformation);
}
}
}三、文档写入与更新
// ============ 文档批量操作服务 ============
public class ProductSearchService
{
private readonly IElasticClient _client;
private readonly ILogger<ProductSearchService> _logger;
private const string IndexName = "products";
public ProductSearchService(IElasticClient client, ILogger<ProductSearchService> logger)
{
_client = client;
_logger = logger;
}
/// <summary>
/// 单文档索引
/// </summary>
public async Task IndexAsync(ProductDocument product)
{
var response = await _client.IndexAsync(product, i => i
.Index(IndexName)
.Id(product.Id)
.Refresh(Refresh.WaitFor));
if (!response.IsValid)
{
_logger.LogError("索引文档失败: {Error}", response.DebugInformation);
}
}
/// <summary>
/// 批量索引(推荐)
/// </summary>
public async Task BulkIndexAsync(IEnumerable<ProductDocument> products)
{
var response = await _client.BulkAsync(b => b
.Index(IndexName)
.Refresh(Refresh.WaitFor)
.IndexMany(products, (descriptor, doc) => descriptor
.Id(doc.Id)
.Document(doc)));
if (response.Errors)
{
foreach (var item in response.ItemsWithErrors)
{
_logger.LogError("批量索引失败: Id={Id}, Error={Error}", item.Id, item.Error?.Reason);
}
}
_logger.LogInformation("批量索引完成: 成功 {Success}, 失败 {Failed}",
response.Items.Count(i => i.IsValid),
response.ItemsWithErrors.Count());
}
/// <summary>
/// 部分更新(仅更新指定字段)
/// </summary>
public async Task PartialUpdateAsync(string productId, decimal newPrice, int additionalSales)
{
var response = await _client.UpdateAsync<ProductDocument, object>(productId, u => u
.Index(IndexName)
.Refresh(Refresh.WaitFor)
.Doc(new
{
Price = newPrice,
SalesCount = additionalSales,
UpdatedAt = DateTime.UtcNow
})
.RetryOnConflict(3));
if (!response.IsValid)
{
_logger.LogError("更新文档失败: Id={Id}, Error={Error}", productId, response.DebugInformation);
}
}
/// <summary>
/// 使用脚本更新(原子递增)
/// </summary>
public async Task IncrementSalesAsync(string productId, int count = 1)
{
var response = await _client.UpdateAsync<ProductDocument, object>(productId, u => u
.Index(IndexName)
.Script(s => s
.Source("ctx._source.sales_count += params.count; ctx._source.updated_at = params.now")
.Params(p => p
.Add("count", count)
.Add("now", DateTime.UtcNow)))
.RetryOnConflict(3));
if (!response.IsValid)
{
_logger.LogError("脚本更新失败: Id={Id}", productId);
}
}
}四、查询 DSL 优化
// ============ 多条件组合搜索 ============
public class SearchQueryService
{
private readonly IElasticClient _client;
private const string IndexName = "products";
public SearchQueryService(IElasticClient client)
{
_client = client;
}
/// <summary>
/// 综合搜索:关键词 + 分类 + 价格区间 + 排序
/// </summary>
public async Task<SearchResult<ProductDocument>> SearchAsync(SearchRequest request)
{
var mustQueries = new List<Action<QueryDescriptor<ProductDocument>>>();
// 关键词搜索
if (!string.IsNullOrWhiteSpace(request.Keyword))
{
mustQueries.Add(q => q
.MultiMatch(mm => mm
.Fields(f => f
.Field(p => p.Name, boost: 3.0) // 名称权重最高
.Field(p => p.Description, boost: 1.0) // 描述权重较低
.Field("name.pinyin", boost: 2.0)) // 拼音匹配
.Query(request.Keyword)
.Type(TextQueryType.BestFields)
.TieBreaker(0.3)
.MinimumShouldMatch("70%")));
}
// 分类过滤
if (!string.IsNullOrWhiteSpace(request.Category))
{
mustQueries.Add(q => q.Term(t => t
.Field(p => p.Category)
.Value(request.Category)));
}
// 品牌过滤
if (request.Brands?.Any() == true)
{
mustQueries.Add(q => q.Terms(t => t
.Field(p => p.Brand)
.Terms(request.Brands)));
}
// 价格区间
if (request.MinPrice.HasValue || request.MaxPrice.HasValue)
{
mustQueries.Add(q => q.Range(r => r
.Field(p => p.Price)
.GreaterThanOrEquals((double?)request.MinPrice)
.LessThanOrEquals((double?)request.MaxPrice)));
}
// 仅展示可用商品
mustQueries.Add(q => q.Term(t => t.Field(p => p.IsAvailable).Value(true)));
var response = await _client.SearchAsync<ProductDocument>(s => s
.Index(IndexName)
.From((request.Page - 1) * request.PageSize)
.Size(request.PageSize)
.Query(q => q.Bool(b => b.Must(mustQueries.ToArray())))
.Sort(sort =>
{
switch (request.SortBy)
{
case "price_asc":
sort.Field(f => f.Field(p => p.Price).Order(SortOrder.Asc));
break;
case "price_desc":
sort.Field(f => f.Field(p => p.Price).Order(SortOrder.Desc));
break;
case "sales":
sort.Field(f => f.Field(p => p.SalesCount).Order(SortOrder.Desc));
break;
case "rating":
sort.Field(f => f.Field(p => p.Rating).Order(SortOrder.Desc));
break;
default: // relevance
sort.Score();
break;
}
return sort;
})
.Highlight(h => h
.Fields(f => f
.Field(p => p.Name)
.PreTags("<em class='highlight'>")
.PostTags("</em>")
.FragmentSize(100)
.NumberOfFragments(1)
))
.Source(src => src // 只返回需要的字段
.Includes(f => f
.Field(p => p.Id)
.Field(p => p.Name)
.Field(p => p.Price)
.Field(p => p.Category)
.Field(p => p.Brand)
.Field(p => p.Rating)
.Field(p => p.SalesCount)
)));
return new SearchResult<ProductDocument>
{
Total = response.Total,
Items = response.Documents.ToList(),
Highlights = response.Hits.Select(h => new HitHighlight
{
Id = h.Id,
Score = h.Score ?? 0,
Highlights = h.Highlight.ToDictionary(
kv => kv.Key,
kv => kv.Value.ToList())
}).ToList()
};
}
}// ============ Function Score 查询(个性化排序) ============
public async Task<SearchResult<ProductDocument>> SearchWithPersonalizationAsync(
string keyword, string userId)
{
var response = await _client.SearchAsync<ProductDocument>(s => s
.Index(IndexName)
.Size(20)
.Query(q => q
.FunctionScore(fs => fs
.Query(qq => qq
.MultiMatch(mm => mm
.Fields(f => f.Field(p => p.Name).Field(p => p.Description))
.Query(keyword)
.Type(TextQueryType.BestFields)))
.Functions(fn => fn
// 销量加权
.FieldValueFactor(fvf => fvf
.Field(p => p.SalesCount)
.Factor(0.1)
.Modifier(FieldValueFactorModifier.Log1p))
// 评分加权
.FieldValueFactor(fvf => fvf
.Field(p => p.Rating)
.Factor(0.5)
.Modifier(FieldValueFactorModifier.Square))
// 新品加权
.Gauss(g => g
.Date(d => d.Field(p => p.CreatedAt))
.Origin("now")
.Scale("30d")
.Decay(0.5))
// 随机打散(避免同一用户总看到相同结果)
.RandomScore(rs => rs.Seed(userId).Weight(0.1)))
.ScoreMode(FunctionScoreMode.Sum)
.BoostMode(FunctionBoostMode.Multiply)
)));
return new SearchResult<ProductDocument>
{
Total = response.Total,
Items = response.Documents.ToList()
};
}五、深度分页:Scroll 与 Search After
// ============ Search After(推荐实时分页方案) ============
public async Task<SearchResult<ProductDocument>> SearchAfterAsync(SearchAfterRequest request)
{
var response = await _client.SearchAsync<ProductDocument>(s =>
{
s.Index(IndexName)
.Size(request.PageSize)
.Query(q => q.MatchAll());
// 排序字段必须与 Search After 的值对应
s.Sort(sort => sort
.Field(f => f.Field(p => p.SalesCount).Order(SortOrder.Desc))
.Field(f => f.Field(p => p.Id).Order(SortOrder.Asc)));
// 传入上一页的排序值
if (request.SortValues?.Any() == true)
{
s.SearchAfter(request.SortValues.Cast<object>().ToArray());
}
return s;
});
// 提取最后一条的排序值,作为下一页的 Search After 参数
var lastHit = response.Hits.LastOrDefault();
var nextSortValues = lastHit?.Sorts?.ToList();
return new SearchResult<ProductDocument>
{
Total = response.Total,
Items = response.Documents.ToList(),
NextSortValues = nextSortValues
};
}
// ============ Scroll(适合全量数据导出) ============
public async IAsyncEnumerable<List<ProductDocument>> ScrollAllAsync(
int batchSize = 1000,
[EnumeratorCancellation] CancellationToken ct = default)
{
var scrollTime = "5m";
var initialResponse = await _client.SearchAsync<ProductDocument>(s => s
.Index(IndexName)
.Size(batchSize)
.Scroll(scrollTime)
.Query(q => q.MatchAll())
.Sort(sort => sort.Field(f => f.Field("_doc"))), ct);
var scrollId = initialResponse.ScrollId;
var documents = initialResponse.Documents.ToList();
yield return documents;
while (documents.Count > 0 && !ct.IsCancellationRequested)
{
var scrollResponse = await _client.ScrollAsync<ProductDocument>(scrollTime, scrollId, ct);
if (!scrollResponse.IsValid || scrollResponse.Documents.IsNullOrEmpty())
break;
scrollId = scrollResponse.ScrollId;
documents = scrollResponse.Documents.ToList();
yield return documents;
}
// 清理 Scroll 上下文
if (scrollId != null)
{
await _client.ClearScrollAsync(c => c.ScrollId(scrollId), ct);
}
}六、聚合分析
// ============ 搜索结果聚合(筛选条件统计) ============
public async Task<SearchAggregation> GetSearchAggregationsAsync(string keyword)
{
var response = await _client.SearchAsync<ProductDocument>(s => s
.Index(IndexName)
.Size(0) // 不返回文档,只返回聚合结果
.Query(q => q
.MultiMatch(mm => mm
.Fields(f => f.Field(p => p.Name).Field(p => p.Description))
.Query(keyword)))
.Aggregations(a => a
// 按分类聚合
.Terms("categories", t => t
.Field(p => p.Category)
.Size(50))
// 按品牌聚合
.Terms("brands", t => t
.Field(p => p.Brand)
.Size(100))
// 价格区间统计
.Stats("price_stats", st => st.Field(p => p.Price))
// 价格区间分布
.Range("price_ranges", r => r
.Field(p => p.Price)
.Ranges(
rr => rr.Key("0-100").To(100),
rr => rr.Key("100-500").From(100).To(500),
rr => rr.Key("500-1000").From(500).To(1000),
rr => rr.Key("1000+").From(1000)
))
// 平均评分
.Avg("avg_rating", av => av.Field(p => p.Rating))
// 标签云
.Terms("tags", t => t
.Field(p => p.Tags)
.Size(30))
));
var categories = response.Aggregations
.Terms("categories")?.Buckets
.Select(b => new BucketItem { Key = b.Key.Value?.ToString()!, Count = b.DocCount ?? 0 })
.ToList() ?? new();
var brands = response.Aggregations
.Terms("brands")?.Buckets
.Select(b => new BucketItem { Key = b.Key.Value?.ToString()!, Count = b.DocCount ?? 0 })
.ToList() ?? new();
var priceStats = response.Aggregations.Stats("price_stats");
return new SearchAggregation
{
Categories = categories,
Brands = brands,
PriceStats = new PriceStatistics
{
Min = (decimal)(priceStats?.Min ?? 0),
Max = (decimal)(priceStats?.Max ?? 0),
Avg = (decimal)(priceStats?.Avg ?? 0),
Count = priceStats?.Count ?? 0
},
AverageRating = response.Aggregations.Avg("avg_rating")?.Value ?? 0
};
}七、自动补全
// ============ Completion Suggester 实现 ============
public class AutoCompleteService
{
private readonly IElasticClient _client;
private const string IndexName = "products";
public AutoCompleteService(IElasticClient client)
{
_client = client;
}
/// <summary>
/// 搜索建议(自动补全)
/// </summary>
public async Task<List<SuggestItem>> SuggestAsync(string prefix, int size = 10)
{
var response = await _client.SearchAsync<ProductDocument>(s => s
.Index(IndexName)
.Size(0)
.Suggest(su => su
.Completion("name_suggest", c => c
.Field(f => f.NameSuggest)
.Prefix(prefix)
.Size(size)
.SkipDuplicates(true)
.Fuzzy(f => f
.Fuzziness(Fuzziness.Auto)
.MinLength(3)
.PrefixLength(1)))
.Completion("pinyin_suggest", c => c
.Field("name.pinyin")
.Prefix(prefix)
.Size(size))
));
var suggestions = new List<SuggestItem>();
// 处理名称建议
var nameSuggest = response.Suggest["name_suggest"]?.FirstOrDefault();
if (nameSuggest != null)
{
foreach (var option in nameSuggest.Options ?? Enumerable.Empty<BaseSuggestOption<ProductDocument>>())
{
suggestions.Add(new SuggestItem
{
Text = option.Text,
Score = option.Score ?? 0,
Source = option.Source
});
}
}
return suggestions.DistinctBy(s => s.Text).Take(size).ToList();
}
/// <summary>
/// 拼写纠错
/// </summary>
public async Task<List<string>> SpellCheckAsync(string misspelled)
{
var response = await _client.SearchAsync<ProductDocument>(s => s
.Index(IndexName)
.Size(0)
.Suggest(su => su
.Term("spell_correction", t => t
.Field(f => f.Name)
.Text(misspelled)
.Sort(SortBy.Frequency)
.SuggestMode(SuggestMode.Always))));
var corrections = new List<string>();
var termSuggest = response.Suggest["spell_correction"]?.FirstOrDefault();
if (termSuggest != null)
{
foreach (var option in termSuggest.Options ?? Enumerable.Empty<TermOption>())
{
corrections.Add(option.Text);
}
}
return corrections;
}
}
// 写入文档时设置 suggest 字段
public class ProductIndexHelper
{
public static ProductDocument EnrichForSuggest(ProductDocument doc)
{
// 使用 IK 分词结果作为 suggest 输入
var inputs = new List<string> { doc.Name };
// 可以添加拼音输入
inputs.Add(ToPinyin(doc.Name));
// 可以添加首字母缩写
inputs.Add(GetFirstLetters(doc.Name));
doc.NameSuggest = new CompletionField
{
Input = inputs.ToArray(),
Weight = doc.SalesCount // 销量越高,建议权重越高
};
return doc;
}
private static string ToPinyin(string text)
{
// 使用第三方拼音库转换
return text; // 简化示例
}
private static string GetFirstLetters(string text)
{
// 获取首字母缩写
return new string(text.Where(char.IsLetter).Select(char.ToUpper).ToArray());
}
}八、与数据库同步
// ============ 基于 Change Stream 的数据同步 ============
public class SearchSyncService : BackgroundService
{
private readonly IServiceProvider _serviceProvider;
private readonly IElasticClient _elasticClient;
private readonly ILogger<SearchSyncService> _logger;
public SearchSyncService(
IServiceProvider serviceProvider,
IElasticClient elasticClient,
ILogger<SearchSyncService> logger)
{
_serviceProvider = serviceProvider;
_elasticClient = elasticClient;
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
while (!stoppingToken.IsCancellationRequested)
{
try
{
using var scope = _serviceProvider.CreateScope();
var dbContext = scope.ServiceProvider.GetRequiredService<AppDbContext>();
// 获取最近更新的记录
var lastSync = DateTime.UtcNow.AddMinutes(-5);
var updatedProducts = await dbContext.Products
.Where(p => p.UpdatedAt >= lastSync)
.ToListAsync(stoppingToken);
if (updatedProducts.Any())
{
var documents = updatedProducts.Select(MapToDocument).ToList();
await BulkIndexWithRetryAsync(documents, stoppingToken);
_logger.LogInformation("同步 {Count} 条产品数据到 Elasticsearch", documents.Count);
}
}
catch (Exception ex)
{
_logger.LogError(ex, "搜索数据同步失败");
}
await Task.Delay(TimeSpan.FromMinutes(1), stoppingToken);
}
}
private async Task BulkIndexWithRetryAsync(
List<ProductDocument> documents, CancellationToken ct, int maxRetries = 3)
{
for (var attempt = 1; attempt <= maxRetries; attempt++)
{
try
{
var response = await _elasticClient.BulkAsync(b => b
.Index("products")
.IndexMany(documents), ct);
if (response.IsValid) return;
_logger.LogWarning("批量索引部分失败(第 {Attempt} 次)", attempt);
}
catch (Exception ex)
{
_logger.LogError(ex, "批量索引异常(第 {Attempt}/{Max} 次)", attempt, maxRetries);
}
await Task.Delay(TimeSpan.FromSeconds(Math.Pow(2, attempt)), ct);
}
}
private static ProductDocument MapToDocument(Product product) => new()
{
Id = product.Id.ToString(),
Name = product.Name,
Description = product.Description,
Category = product.Category,
Brand = product.Brand,
Price = product.Price,
SalesCount = product.SalesCount,
Rating = product.AverageRating,
Tags = product.Tags?.Split(',').ToList() ?? new(),
IsAvailable = product.Status == ProductStatus.Active,
CreatedAt = product.CreatedAt,
UpdatedAt = product.UpdatedAt,
Popularity = product.SalesCount * product.AverageRating
};
}九、搜索监控
// ============ 慢查询日志 ============
// 在创建索引时配置慢查询阈值
await _client.Indices.PutSettingsAsync("products", p => p
.Settings(s => s
.Search(slow => slow
.Query(q => q
.ThresholdWarn("5s")
.ThresholdInfo("2s")
.ThresholdDebug("1s"))
.Fetch(f => f
.ThresholdWarn("5s")
.ThresholdInfo("2s")))
.Indexing(i => i
.Slowlog(sl => sl
.ThresholdWarn("10s")
.ThresholdInfo("5s")))
));
// ============ 搜索性能指标收集 ============
public class SearchMetricsMiddleware
{
private readonly RequestDelegate _next;
public SearchMetricsMiddleware(RequestDelegate next)
{
_next = next;
}
public async Task InvokeAsync(HttpContext context, IMetricsCollector metrics)
{
if (context.Request.Path.StartsWithSegments("/api/search"))
{
var stopwatch = Stopwatch.StartNew();
await _next(context);
stopwatch.Stop();
metrics.RecordTiming("search.request.duration", stopwatch.ElapsedMilliseconds);
metrics.Increment("search.request.total");
if (stopwatch.ElapsedMilliseconds > 1000)
{
metrics.Increment("search.request.slow");
}
}
else
{
await _next(context);
}
}
}优点
- 查询性能卓越:基于倒排索引,亿级数据毫秒级响应
- 相关性评分:BM25 算法 + Function Score,精确控制排序
- 丰富的聚合:支持多维度统计分析,减少对数据库聚合查询的依赖
- 水平扩展:通过增加节点线性提升存储和查询能力
缺点
- 学习曲线:查询 DSL 复杂,分词器配置需要深入理解
- 数据一致性:与数据库之间是最终一致,存在同步延迟
- 资源消耗:JVM 内存占用较高,建议至少 8GB 堆内存
- 运维复杂:集群管理、分片策略、故障恢复都需要专业知识
性能注意事项
- 分片数量:建议每个分片 10-50GB 数据,分片过多增加协调开销
- Refresh Interval:写入密集场景可增大到 30s,减少 Segment 合并压力
- Routing:按用户或租户路由可以减少查询需要扫描的分片数
- Source Filtering:只返回需要的字段,减少网络传输
- Filter Cache:Filter 查询结果会被缓存,适合用于不变的筛选条件
总结
搜索引擎是提升用户体验的关键基础设施。在 .NET 应用中集成 Elasticsearch 需要关注三个层面:索引设计(分词器、映射)、查询优化(相关性调优、深度分页)和数据同步(近实时、一致性)。IK 分词器是中文搜索的基础,Function Score 是排序调优的利器,Search After 是深度分页的最佳实践。
关键知识点
| 知识点 | 要点 |
|---|---|
| IK 分词器 | ik_max_word 索引分词,ik_smart 搜索分词 |
| 倒排索引 | 全文搜索的核心数据结构 |
| BM25 算法 | 默认的相关性评分算法 |
| Function Score | 通过自定义函数调整排序权重 |
| Search After | 基于排序值的深度分页方案 |
| Scroll | 全量数据遍历方案(不适合实时) |
| Completion Suggester | 自动补全的专用数据结构 |
| 聚合 | 类似 SQL GROUP BY 但更强大 |
常见误区
误区:Elasticsearch 可以替代数据库
- 事实:ES 不支持事务,不适合作为主数据存储
误区:from + size 可以无限翻页
- 事实:默认限制 10000 条,深度分页应使用 Search After 或 Scroll
误区:中文搜索直接用 standard 分词器就行
- 事实:standard 会把中文逐字拆分,必须使用 IK 等中文分词器
误区:索引时和搜索时用同一个分词器
- 事实:索引时用 ik_max_word(细粒度),搜索时用 ik_smart(粗粒度)
误区:Elasticsearch 数据是实时可搜的
- 事实:默认 1 秒刷新间隔,写入后约 1 秒才可被搜到
进阶路线
- 向量搜索:kNN Search 支持语义搜索和推荐系统
- 跨集群搜索(CCS):在多个 ES 集群间执行联合搜索
- 索引生命周期管理(ILM):自动滚动、归档、删除索引
- 机器学习(ML):ES 内置异常检测和预测功能
- OpenSearch:AWS 维护的 ES 分支,功能类似但许可证更宽松
适用场景
| 场景 | 推荐方案 |
|---|---|
| 电商商品搜索 | IK 分词 + Function Score + 聚合 |
| 日志分析 | ELK Stack(ES + Logstash + Kibana) |
| 全文搜索 | MultiMatch + 高亮 |
| 自动补全 | Completion Suggester |
| 推荐系统 | 向量搜索 + Function Score |
| 数据看板 | 聚合 + Date Histogram |
落地建议
- 第一步:环境搭建。部署 ES 集群,安装 IK 分词器插件
- 第二步:索引设计。定义 Mapping,配置分词器和字段类型
- 第三步:数据同步。实现增量同步机制
- 第四步:基础查询。实现 MultiMatch 搜索和过滤
- 第五步:排序调优。引入 Function Score 个性化排序
- 第六步:自动补全。添加 Completion Suggester
- 第七步:监控告警。配置慢查询日志和性能指标
排错清单
复盘问题
- 当前搜索系统的平均响应时间是多少?P99 延迟呢?
- 搜索结果的相关性用户满意吗?有没有做过 A/B 测试?
- 数据库与 ES 之间的同步延迟是多少?如何处理延迟导致的不一致?
- ES 集群的健康状态如何?分片分布是否均衡?
- 是否有慢查询监控?最慢的查询是哪些?
