使用java restclient连接elasticsearch

发布时间 2023-05-09 18:25:12作者: 月习

java连接es方式有transport、rest client、java client方式。官方最新标的transport和rest client方式都已经过时了,这里用的es7 还是以rest client方式进行连接测试。java client 是es7.15之后推出的。下面开始使用High Level Java REST Client。maven依赖

  • org.elasticsearch.client:elasticsearch-rest-client

  • org.elasticsearch:elasticsearch
    maven配置

    <properties>
    	<elasticsearch.version>7.12.1</elasticsearch.version>
    </properties>
    <dependencies>
            <dependency>
                <groupId>org.elasticsearch</groupId>
                <artifactId>elasticsearch</artifactId>
                <version>${elasticsearch.version}</version>
            </dependency>
            <dependency>
                <groupId>org.elasticsearch.client</groupId>
                <artifactId>elasticsearch-rest-client</artifactId>
                <version>${elasticsearch.version}</version>
            </dependency>
            <dependency>
                <groupId>org.elasticsearch.client</groupId>
                <artifactId>elasticsearch-rest-high-level-client</artifactId>
                <version>${elasticsearch.version}</version>
            </dependency>
            <dependency>
                <groupId>commons-logging</groupId>
                <artifactId>commons-logging</artifactId>
                <version>1.2</version>
            </dependency>
    
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
            </dependency>
    
            <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <scope>test</scope>
            </dependency>
            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
            </dependency>
        </dependencies>
    

准备条件

还是基于上篇文章预先创建一个测试用的test索引,mapping字段信息如下:

  "mappings":{
    "properties": {
      "title":{
        "type":"text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_smart"
      },
      "actor":{
        "type": "text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_smart"
      },
      "score":{
        "type":"double"
      }
     }
   }

创建一个数据对象

@Data
@NoArgsConstructor
@AllArgsConstructor
public class Film implements Serializable {
    private String title;
    private double score;
    private String actor;
}

建立连接

这里使用junit进行测试,定义一个公共的RestHighLevelClient类型client连接。后续测试查询执行都基于该连接下。

//连接变量
RestHighLevelClient client;
//测试索引名称
public static String INDEX_NAME = "test";
@Before
public void init(){
  client = new RestHighLevelClient(RestClient.builder(
  //可以放置多个es地址
  new HttpHost("localhost",9200,"http"))
);

索引操作就不练习了,一般在可视化工具里初始化好,这里只练习查询操作。

添加文档 (IndexRequest)

@Test
public void index1() throws IOException {
  //指定索引 和文档ID
  IndexRequest request = new IndexRequest(INDEX_NAME).id("1");
  Film f = new Film("大话西游",9.5,"周星驰,吴孟达,朱茵");
  request.source(JSON.toJSONString(f), XContentType.JSON);
  IndexResponse response = client.index(request, RequestOptions.DEFAULT);
  //执行结果 存在返回created,不存在updated
  System.out.println(response.getResult().getLowercase());
  RestStatus status = response.status();
  Assert.assertEquals(200,status.getStatus());
}

按ID查询文档(GetRequest)

@Test
public void get1() throws IOException {
 //指定索引和id
  GetRequest request = new GetRequest(INDEX_NAME,"1");
  // request.version(11);
  GetResponse response = client.get(request, RequestOptions.DEFAULT);
  //文档是否存在
  if(response.isExists()){
  //输出source内容
  System.out.println(response.getSourceAsMap());
  }else{
  System.out.println("not exists");
  }
}
/**
 输出:{actor=周星驰,吴孟达,朱茵, score=9.5, title=大话西游}
*/

@Test
public void get2() throws IOException {
  GetRequest request = new GetRequest(INDEX_NAME,"1");
  //只获取指定的field
  FetchSourceContext fetchSource = new FetchSourceContext(true,new String[]{"title"},null);
  request.fetchSourceContext(fetchSource);
  GetResponse response = client.get(request, RequestOptions.DEFAULT);
  if(response.isExists()){
  System.out.println(response.getSourceAsMap());
  }else{
  System.out.println("not exists");
  }
}
/**
 输出:{title=大话西游}
*/

    @Test
    public void exists() throws IOException {
        GetRequest request = new GetRequest(INDEX_NAME,"11");
        //判断是否存在
        boolean exists = client.exists(request, RequestOptions.DEFAULT);
        System.out.println(exists);
    }

更新文档(UpdateRequest)

UpdateRequest request = new UpdateRequest(INDEX_NAME,"1");
// key value形式
request.doc("score",9.6,"title","大话西游1");
//json map
Map<String, Object> jsonMap = new HashMap<>();
jsonMap.put("score", 9.7);
jsonMap.put("title", "大话西游2");
request.doc(jsonMap);
UpdateResponse response = client.update(request, RequestOptions.DEFAULT);
Assert.assertEquals(200,response.status().getStatus());

删除文档(DeleteRequest)

DeleteRequest request = new DeleteRequest(INDEX_NAME,"1");
DeleteResponse response = client.delete(request, RequestOptions.DEFAULT);
Assert.assertEquals(200,response.status().getStatus());

按条件删除(DeleteByQueryRequest)

DeleteByQueryRequest request = new DeleteByQueryRequest(INDEX_NAME);
request.setQuery(new TermQueryBuilder("title","大话"));
BulkByScrollResponse response = client.deleteByQuery(request, RequestOptions.DEFAULT);
System.out.println(response.getStatus().getDeleted());

bulk批量操作(BulkRequest)

就是把多个indexrequest、updaterequest、deleterequest放到bulkrequest

BulkRequest request = new BulkRequest();
        request.add(new IndexRequest(INDEX_NAME).id("1").source(JSON.toJSONString(new Film("大话西游1",9.3,"周星驰")),XContentType.JSON))
               .add(new IndexRequest(INDEX_NAME).id("2").source(JSON.toJSONString(new Film("大话西游2",9.5,"周星驰")),XContentType.JSON))
                .add(new DeleteRequest(INDEX_NAME).id("3"));
        BulkResponse responses = client.bulk(request, RequestOptions.DEFAULT);
        //是否有执行失败
        System.out.println(responses.hasFailures());
        responses.forEach(bulkItemResponse -> {
            System.out.println(bulkItemResponse.getOpType().name()+"-"+ bulkItemResponse.getResponse().getResult().getLowercase() +"-"+bulkItemResponse.getResponse().status().getStatus());
            //按操作类型转执行结果
            switch (bulkItemResponse.getOpType()){
                case INDEX:
                case CREATE:
                    IndexResponse indexResp = bulkItemResponse.getResponse();
                    break;
                case DELETE:
                    DeleteResponse delResp = bulkItemResponse.getResponse();
                    break;
                case UPDATE:
                    UpdateResponse updateResp = bulkItemResponse.getResponse();
            }
        });

检索(SearchRequest)

检索使用一个公共的SearchRequest请求。可以设置不同的querybuilder进行不同的条件查询。

1、matchAllQuery

SearchRequest request = new SearchRequest(INDEX_NAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
builder.query(QueryBuilders.matchAllQuery());
request.source(builder);

SearchResponse response = client.search(request, RequestOptions.DEFAULT);
System.out.println(response.status().getStatus());
response.getHits().forEach(hit ->{
	System.out.println(hit.getSourceAsMap());
});
/**
输出:这是全量测试数据,下面查询输出都针对这个数据基础检索
4:{actor=韩庚/唐嫣/吴京/莫文蔚, score=4.1, title=大话西游3}
5:{actor=六小龄童/迟重瑞/马德华/徐少华, score=9.7, title=西游记}
6:{actor=周星驰/元秋/元华/黄圣依/梁小龙/陈国坤, score=8.8, title=功夫}
7:{actor=周星驰/袁咏仪/罗家英/陈宝莲, score=8.7, title=国产凌凌漆}
8:{actor=周星驰, score=8.7, title=西游大话}
1:{actor=周星驰,吴孟达,朱茵, score=9.5, title=大话西游}
11:{actor=大话西游前传,周星驰, score=7.5, title=西游篇之1}
9:{actor=黄渤,文章, score=7.5, title=大话之西游}
2:{actor=黄渤,文章, score=8.7, title=西游降魔篇}
*/

2、TermQuery

SearchRequest request = new SearchRequest(INDEX_NAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
//Term Query 精确匹配,不会再分词,就是一个固定token去索引里匹配
builder.query(new TermQueryBuilder("actor","周星驰"));
//设置查询条数 从第几条到开始取多少条 分页
builder.from(0).size(5);
//设置查询超时时间
builder.timeout(new TimeValue(60, TimeUnit.SECONDS));
request.source(builder);

SearchResponse response = client.search(request, RequestOptions.DEFAULT);
System.out.println(response.status().getStatus());
response.getHits().forEach(hit ->{
System.out.println(hit.getSourceAsMap());
});
/**
输出:
{actor=周星驰, score=8.7, title=西游大话}
{actor=周星驰,吴孟达,朱茵, score=9.5, title=大话西游}
{actor=周星驰/袁咏仪/罗家英/陈宝莲, score=8.7, title=国产凌凌漆}
{actor=大话西游前传,周星驰, score=7.5, title=西游篇之1}
{actor=周星驰/元秋/元华/黄圣依/梁小龙/陈国坤, score=8.8, title=功夫}
*/

3、matchQuery

SearchRequest request = new SearchRequest(INDEX_NAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
//match query 分词匹配 operator指定多个分词匹配方式
builder.query(QueryBuilders.matchQuery("title","大话西游").operator(Operator.OR));
/**
* 设置排序
* ScoreSortBuilder 设置评分排序
* FieldSortBuilder 设置某个字段排序
* 默认按score降序,id升序
*/
builder.sort(new FieldSortBuilder("score").order(SortOrder.DESC));
//builder.trackTotalHits(true);
builder.size(2);
//设置返回字段和不返回的字段
builder.fetchSource(new String[]{"title","score"},null);
request.source(builder);

SearchResponse response = client.search(request, RequestOptions.DEFAULT);
System.out.println(response.status().getStatus());
//命中总条数 可以当分页总条数。好像这个又有最大限制,后面再研究
System.out.println(response.getHits().getTotalHits().value);
response.getHits().forEach(hit ->{
System.out.println(hit.getScore()+":"+hit.getSourceAsMap());
});
/** 因为加了sort,score没有值
输出:
NaN:{score=9.5, title=大话西游}
NaN:{score=4.1, title=大话西游3}
*/

这里 检索大话西游 为什么西游记没有查出呢。因为title字段定义的时候是这样

"title":{
  "type":"text",
  "analyzer": "ik_max_word",
  "search_analyzer": "ik_smart"
}

analyzer 指定索引创建时候使用ik_max_word,search_analyzer指定检索的时候使用ik_smart分词器。 _analyze执行下会发现ik_smart分词器对 大话西游的分词结果只有一个{大话西游 }词条。所以无法检索出西游记。ik_max_word分词结果是{大话西游、大话、西游}三个词条。

4、multiMatchQuery

多列匹配,检索关键字去匹配多个列字段

SearchRequest request = new SearchRequest(INDEX_NAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
/**
* multiMatchQuery:
*  关键字匹配多个字段。
*  这里 “大话西游” 去匹配 title和actor两个字段
*/
MultiMatchQueryBuilder multiMatchQuery = QueryBuilders.multiMatchQuery("大话西游", "title", "actor");

builder.query(multiMatchQuery);
request.source(builder);

SearchResponse response = client.search(request, RequestOptions.DEFAULT);
System.out.println(response.status().getStatus());
//命中总条数
System.out.println(response.getHits().getTotalHits().value);
response.getHits().forEach(hit ->{
System.out.println(hit.getScore()+":"+hit.getSourceAsMap());
});
/**
输出:actor和title包含 大话西游的都可以查出
1.9787104:{actor=大话西游前传,周星驰, score=7.5, title=西游篇之1}
1.1451323:{actor=周星驰,吴孟达,朱茵, score=9.5, title=大话西游}
1.0077165:{actor=韩庚/唐嫣/吴京/莫文蔚, score=4.1, title=大话西游3}
*/

5、boolQuery

多条件组合查询

SearchRequest request = new SearchRequest(INDEX_NAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
/**
* boolQuery:
*
*  must:文档中一定包含的内容
*  mustNot:一定不包含内容
*  filter:满足其它检索条件基础上添加过滤条件
*  should:不一定包含的查询条件
*  minimumShouldMatch:should条件最小命中数
*/
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.matchQuery("title","大话西游"))
.filter(QueryBuilders.rangeQuery("score").gte(8d))
.should(QueryBuilders.matchQuery("actor","周星驰"))
.minimumShouldMatch(1);
builder.query(boolQuery);
request.source(builder);

SearchResponse response = client.search(request, RequestOptions.DEFAULT);
System.out.println(response.status().getStatus());
//命中总条数
System.out.println(response.getHits().getTotalHits().value);
response.getHits().forEach(hit ->{
	System.out.println(hit.getScore()+":"+hit.getSourceAsMap());
});
/**
输出:
1.7510496:{actor=周星驰,吴孟达,朱茵, score=9.5, title=大话西游}
*/

6、matchPhraseQuery

SearchRequest request = new SearchRequest(INDEX_NAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
/**
* matchPhraseQuery:
*  分词的每个词条都要匹配,并且词条连续出现
*  西游之大话,西游大话都匹配不到
*/
MatchPhraseQueryBuilder matchPhraseQuery = QueryBuilders.matchPhraseQuery("title", "大话西游");

builder.query(matchPhraseQuery);
request.source(builder);

SearchResponse response = client.search(request, RequestOptions.DEFAULT);
System.out.println(response.status().getStatus());
//命中总条数
System.out.println(response.getHits().getTotalHits().value);
response.getHits().forEach(hit ->{
System.out.println(hit.getScore()+":"+hit.getSourceAsMap());
});
/**
输出:
1.1451323:{actor=周星驰,吴孟达,朱茵, score=9.5, title=大话西游}
1.0077165:{actor=韩庚/唐嫣/吴京/莫文蔚, score=4.1, title=大话西游3}
*/

7、matchquery 高亮显示

命中关键词添加标签高亮显示

SearchRequest request = new SearchRequest(INDEX_NAME);
SearchSourceBuilder builder = new SearchSourceBuilder();
//match query 分词匹配 operator指定多个分词匹配方式
builder.query(QueryBuilders.matchQuery("title","大话西游").operator(Operator.OR));
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.highlighterType("unified")
  //设置标签
		.preTags("<font color='red'>").postTags("</font>")
		.field("title");

builder.highlighter(highlightBuilder);
request.source(builder);

SearchResponse response = client.search(request, RequestOptions.DEFAULT);
System.out.println(response.status().getStatus());

response.getHits().forEach(hit ->{
  // 命中信息
	Map<String, HighlightField> highlightFields = hit.getHighlightFields();
	//System.out.println(highlightFields);
  //替换元素sourcemap数据为加标签后
	highlightFields.forEach( (s, highlightField) -> {
		String text = "";
		for (Text fragment : highlightField.fragments()) {
			text += fragment;
		}
		hit.getSourceAsMap().put(s,text);
	});
	System.out.println(hit.getScore()+":"+hit.getSourceAsMap());
});
/**
输出:匹配关键词 大话西游 添加了font标签
1.1451323:{actor=周星驰,吴孟达,朱茵, score=9.5, title=<font color='red'>大话西游</font>}
1.0077165:{actor=韩庚/唐嫣/吴京/莫文蔚, score=4.1, title=<font color='red'>大话西游</font>3}
*/

还有一些查询,这里就测试到这里,更多参考官方文档

https://www.elastic.co/guide/en/elasticsearch/client/java-rest/7.17/java-rest-high.html