自动补全、搜索建议

发布时间 2024-01-10 09:34:31作者: 粒子先生

作为系统的使用者,我希望用户输入搜索的过程中,系统能进行自动补全和搜索建议,协助用户输入更精准的关键词,提高后续全文搜索阶段文档匹配的准确度。

实现方案

  • 用户刚开始输入的过程中,使用Completion Suggester进行关键词前缀匹配,刚开始匹配项会比较多,随着用户输入字符增多,匹配项越来越少。
  • 如果Completion Suggester已经到了零匹配,可能是用户输入错误,尝试Phrase Suggester进行短语推荐。
  • 如果Phrase Suggester没有找到任何option,开始尝试term Suggester进行单词推荐。
  • 自动补全字段是通过将查询字段分词后,去重,然后加入到suggest字段中。

自动补全

自动补全DSL示例

GET product_completion_index/_search
{
"from": 0,
"size": 100,
"suggest": {
 "czbk-suggest": {
  "prefix": "小米",
  "completion": {
   "field": "searchkey",
   "size": 20,
   "skip_duplicates": true
  }
 }
}
}

自动补全字段数据获取方法

private boolean updateSuggest(HouseIndexTemplate indexTemplate) {
        //将分词字段加入AnalyzeRequestBuilder,通过ik_smart分词后会生成多个词组,然后将词组加入suggest字段
        AnalyzeRequestBuilder requestBuilder = new AnalyzeRequestBuilder(
                this.esClient, AnalyzeAction.INSTANCE, INDEX_NAME, indexTemplate.getTitle(),
                indexTemplate.getLayoutDesc(), indexTemplate.getRoundService(),
                indexTemplate.getDescription(), indexTemplate.getSubwayLineName(),
                indexTemplate.getSubwayStationName());
        //采用ik_smart分词
        requestBuilder.setAnalyzer("ik_smart");
 
        AnalyzeResponse response = requestBuilder.get();
        List<AnalyzeResponse.AnalyzeToken> tokens = response.getTokens();
        if (tokens == null) {
            logger.warn("Can not analyze token for house: " + indexTemplate.getHouseId());
            return false;
        }
 
        List<HouseSuggest> suggests = new ArrayList<>();
        for (AnalyzeResponse.AnalyzeToken token : tokens) {
            // 排序数字类型 & 小于2个字符的分词结果
            if ("<NUM>".equals(token.getType()) || token.getTerm().length() < 2) {
                continue;
            }
 
            HouseSuggest suggest = new HouseSuggest();
            suggest.setInput(token.getTerm());
            suggests.add(suggest);
        }
 
        // 定制化小区自动补全(不需要分词的字段手动额外加入)
        HouseSuggest suggest = new HouseSuggest();
        suggest.setInput(indexTemplate.getDistrict());
        suggests.add(suggest);
 
        indexTemplate.setSuggest(suggests);
        return true;
    }

自动补全代码示例

package com.oldlu.service;
import com.oldlu.commons.pojo.CommonEntity;
import org.elasticsearch.action.DocWriteResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import java.util.List;
import java.util.Map;
/**
* @Class: ElasticsearchDocumentService
* @Package com.oldlu.service
* @Description: 文档操作接口
* @Company: http://www.oldlu.com/
*/
public interface ElasticsearchDocumentService {
  //自动补全(完成建议)
  public List<String> cSuggest(CommonEntity commonEntity) throws Exception;
}

Service

/*
  * @Description: 自动补全 根据用户的输入联想到可能的词或者短语
  * @Method: suggester
  * @Param: [commonEntity]
  * @Update:
  * @since: 1.0.0
  * @Return: org.elasticsearch.action.search.SearchResponse
  *
  */
  public List<String> cSuggest(CommonEntity commonEntity) throws Exception {
    //定义返回
    List<String> suggestList = new ArrayList<>();
    //构建查询请求
    SearchRequest searchRequest = new
SearchRequest(commonEntity.getIndexName());
    //通过查询构建器定义评分排序
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
    //构造搜索建议语句,搜索条件字段
    CompletionSuggestionBuilder completionSuggestionBuilder =new
CompletionSuggestionBuilder(commonEntity.getSuggestFileld());
    //搜索关键字
    completionSuggestionBuilder.prefix(commonEntity.getSuggestValue());
    //去除重复
    completionSuggestionBuilder.skipDuplicates(true);
    //匹配数量
    completionSuggestionBuilder.size(commonEntity.getSuggestCount());
    searchSourceBuilder.suggest(new SuggestBuilder().addSuggestion("czbk-
suggest", completionSuggestionBuilder));
    //czbk-suggest为返回的字段,所有返回将在czbk-suggest里面,可写死,sort按照评分排
序
    searchRequest.source(searchSourceBuilder);
    //定义查找响应
    SearchResponse suggestResponse = client.search(searchRequest,
RequestOptions.DEFAULT);
    //定义完成建议对象
    CompletionSuggestion completionSuggestion =
suggestResponse.getSuggest().getSuggestion("czbk-suggest");
    List<CompletionSuggestion.Entry.Option> optionsList =
completionSuggestion.getEntries().get(0).getOptions();
    //从optionsList取出结果
    if (!CollectionUtils.isEmpty(optionsList)) {
      optionsList.forEach(item ->
suggestList.add(item.getText().toString()));
   }
    return suggestList;
 }

Controller

/*
  * @Description 自动补全
  * @Method: suggester
  * @Param: [commonEntity]
  * @Update:
  * @since: 1.0.0
  * @Return: com.oldlu.commons.result.ResponseData
  *
  */
  @GetMapping(value = "/csuggest")
  public ResponseData cSuggest(@RequestBody CommonEntity commonEntity) {
    // 构造返回数据
    ResponseData rData = new ResponseData();
    if (StringUtils.isEmpty(commonEntity.getIndexName()) ||
StringUtils.isEmpty(commonEntity.getSuggestFileld()) ||
StringUtils.isEmpty(commonEntity.getSuggestValue())) {
      rData.setResultEnum(ResultEnum.PARAM_ISNULL);
      return rData;
   }
    //批量查询返回结果
    List<String> result = null;
    try {
      //通过高阶API调用批量新增操作方法
      result = elasticsearchDocumentService.cSuggest(commonEntity);
      //通过类型推断自动装箱(多个参数取交集)
      rData.setResultEnum(result, ResultEnum.SUCCESS, result.size());
      //日志记录
      logger.info(TipsEnum.CSUGGEST_GET_DOC_SUCCESS.getMessage());
   } catch (Exception e) {
      //日志记录
      logger.error(TipsEnum.CSUGGEST_GET_DOC_FAIL.getMessage(), e);
      //构建错误返回信息
      rData.setResultEnum(ResultEnum.ERROR);
   }
    return rData;
 }

拼写纠错

短语推荐DSL示例

GET product_completion_index/_search
{
"suggest": {
 "czbk-suggestion": {
  "text": "adidaas官方旗舰店",
  "phrase": {
   "field": "name",
   "size": 13
  }
 }
}
}

拼写纠错代码示例

Service

/*
  * @Description: 拼写纠错
  * @Method: psuggest
  * @Param: [commonEntity]
  * @Update:
  * @since: 1.0.0
  * @Return: java.util.List<java.lang.String>
  *
  */
  @Override
  public String pSuggest(CommonEntity commonEntity) throws Exception {
    //定义返回
    String pSuggestString = new String();
    //定义查询请求
SearchRequest searchRequest = new
SearchRequest(commonEntity.getIndexName());
    //定义查询条件构建器
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    //定义排序器
    searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
    //构造短语建议器对象(参数为匹配列)
    PhraseSuggestionBuilder pSuggestionBuilder = new
PhraseSuggestionBuilder(commonEntity.getSuggestFileld());
    //搜索关键字(被纠错的值)
    pSuggestionBuilder.text(commonEntity.getSuggestValue());
    //匹配数量
    pSuggestionBuilder.size(1);
    searchSourceBuilder.suggest(new SuggestBuilder().addSuggestion("czbk-
suggest", pSuggestionBuilder));
    searchRequest.source(searchSourceBuilder);
    //定义查找响应
    SearchResponse suggestResponse = client.search(searchRequest,
RequestOptions.DEFAULT);
    //定义短语建议对象
    PhraseSuggestion phraseSuggestion =
suggestResponse.getSuggest().getSuggestion("czbk-suggest");
    //获取返回数据
    List<PhraseSuggestion.Entry.Option> optionsList =
phraseSuggestion.getEntries().get(0).getOptions();
    //从optionsList取出结果
    if (!CollectionUtils.isEmpty(optionsList)
&&optionsList.get(0).getText()!=null) {
      pSuggestString = optionsList.get(0).getText().string().replaceAll("
","");
   }
    return pSuggestString;
 }

Controller

 
/*
  * @Description: 拼写纠错
  * @Method: suggester2
  * @Param: [commonEntity]
  * @Update:
  * @since: 1.0.0
  * @Return: com.oldlu.commons.result.ResponseData
  *
  */
 @GetMapping(value = "/psuggest")
 public ResponseData pSuggest(@RequestBody CommonEntity commonEntity) {
   // 构造返回数据
   ResponseData rData = new ResponseData();
   if (StringUtils.isEmpty(commonEntity.getIndexName()) ||
StringUtils.isEmpty(commonEntity.getSuggestFileld()) ||
StringUtils.isEmpty(commonEntity.getSuggestValue())) {
     rData.setResultEnum(ResultEnum.PARAM_ISNULL);
     return rData;
}
   //批量查询返回结果
   String result = null;
   try {
     //通过高阶API调用批量新增操作方法
     result = elasticsearchDocumentService.pSuggest(commonEntity);
     //通过类型推断自动装箱(多个参数取交集)
     rData.setResultEnum(result, ResultEnum.SUCCESS, null);
     //日志记录
     logger.info(TipsEnum.PSUGGEST_GET_DOC_SUCCESS.getMessage());
   } catch (Exception e) {
     //日志记录
     logger.error(TipsEnum.PSUGGEST_GET_DOC_FAIL.getMessage(), e);
     //构建错误返回信息
     rData.setResultEnum(ResultEnum.ERROR);
   }
   return rData;
 }