添加pom
<!--elasticsearch-->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.5.1</version>
<exclusions>
<exclusion>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
</exclusion>
<exclusion>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
<version>7.5.1</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.5.1</version>
</dependency>
yml添加配置
es:
host: 192.168.1.107
port: 9200
scheme: http
初始化client
package com.zh.search.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ElasticConfig {
@Value("${es.host}")
public String host;
@Value("${es.port}")
public int port;
@Value("${es.scheme}")
public String scheme;
@Bean
public RestClientBuilder restClientBuilder() {
return RestClient.builder(makeHttpHost());
}
@Bean
public RestClient restClient(){
return RestClient.builder(new HttpHost(host, port, scheme)).build();
}
private HttpHost makeHttpHost() {
return new HttpHost(host, port, scheme);
}
@Bean
public RestHighLevelClient restHighLevelClient(@Autowired RestClientBuilder restClientBuilder){
return new RestHighLevelClient(restClientBuilder);
}
}
在resource下创建索引配置json文件,
settings.json
{
"number_of_shards": 5,
"number_of_replicas": 1,
"refresh_interval": "5s",
"analysis": {
"analyzer": {
// ik细粒度
"ikSearchAnalyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"char_filter": [
"tsconvert"
]
},
// ik粗粒度分词
"ikSmartSearchAnalyzer": {
"type": "custom",
"tokenizer": "ik_smart",
"char_filter": [
"tsconvert"
]
},
// 拼音分词
"pinyinSimpleAnalyzer": {
"tokenizer": "my_pinyin"
},
// 拼音,大小写,短语分词
"pinyinComplexAnalyzer": {
"tokenizer": "ik_smart",
"filter": [
"lowercase",
"pinyin_simple_filter",
"edge_ngram_filter"
]
},
// 大小写转换分词
"lowercaseAnalyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
},
"tokenizer" : {
"my_pinyin" : {
"type" : "pinyin",
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
},
"filter": {
// 短语过滤
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 50
},
// 拼音过滤
"pinyin_simple_filter": {
"type": "pinyin",
"first_letter": "prefix",
"padding_char": " ",
"limit_first_letter_length": 50, //设置first_letter结果的最大长度,默认值:16
// "keep_separate_first_letter" : false, //启用该选项时,将保留第一个字母分开,例如:刘德华> l,d,h,默认:false,注意:查询结果也许是太模糊,由于长期过频
// "keep_full_pinyin" : true, //当启用该选项,例如:刘德华> [ liu,de,hua],默认值:true
// "keep_original" : true, //启用此选项时,也将保留原始输入,默认值:false
// "remove_duplicated_term" : true, //启用此选项后,将删除重复的术语以保存索引,例如:de的> de,default:false,注意:位置相关的查询可能会受到影响
"lowercase": true //小写非中文字母,默认值:true
}
},
"char_filter": {
// 简繁体过滤
"tsconvert": {
"type": "stconvert",
"convert_type": "t2s"
}
}
}
}
创建索引映射文件
commodity-mapping.json
{
"properties": {
"id": {
"type": "integer"
},
"keyword": {
//text和keyword的区别text:存储数据时候,会自动分词,并生成索引,keyword:存储数据时候,不会分词建立索引
"type": "text",
"analyzer": "ikSearchAnalyzer",
"search_analyzer": "ikSmartSearchAnalyzer",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyinComplexAnalyzer",
"search_analyzer": "pinyinComplexAnalyzer",
"store": false,
"term_vector": "with_offsets"
}
}
},
"ownerNature": {
"type": "keyword"
},
"model": {
"type": "keyword",
//不能通过这个字段搜索
"index": false
},
"weight": {
"type": "integer"
},
"createTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
采用json的方式,我觉得直观一点
创建索引(需要注意的是,7.x后,es删除了type,只允许存在一种type,不需要指定type的值,默认是_doc)
public void init() throws Exception {
this.createIndex("commodity");
}
/**
* 创建索引
* @param index
* @throws IOException
*/
public void createIndex(String index) throws IOException {
//如果存在就不创建了
if(this.existsIndex(index)) {
System.out.println(index+"索引库已经存在!");
return;
}
// 开始创建库
CreateIndexRequest request = new CreateIndexRequest(index);
//配置文件
ClassPathResource seResource = new ClassPathResource("mapper/setting.json");
InputStream seInputStream = seResource.getInputStream();
String seJson = String.join("\n",IOUtils.readLines(seInputStream,"UTF-8"));
seInputStream.close();
//映射文件
ClassPathResource mpResource = new ClassPathResource("mapper/"+index+"-mapping.json");
InputStream mpInputStream = mpResource.getInputStream();
String mpJson = String.join("\n",IOUtils.readLines(mpInputStream,"UTF-8"));
mpInputStream.close();
request.settings(seJson, XContentType.JSON);
request.mapping(mpJson, XContentType.JSON);
//设置别名
request.alias(new Alias(index+"_alias"));
CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
boolean falg = createIndexResponse.isAcknowledged();
if(falg){
System.out.println("创建索引库:"+index+"成功!" );
}
}
判断索引是否存在
/**
* 判断索引是否存在
* @param index
* @return
* @throws IOException
*/
public boolean existsIndex(String index) throws IOException {
GetIndexRequest getRequest = new GetIndexRequest(index);
getRequest.local(false);
getRequest.humanReadable(true);
return restHighLevelClient.indices().exists(getRequest, RequestOptions.DEFAULT);
}
删除索引
/**
* 删除索引
* @param index
* @return
* @throws IOException
*/
public boolean delIndex(String index) throws IOException {
DeleteIndexRequest request = new DeleteIndexRequest(index);
AcknowledgedResponse deleteIndexResponse = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
return deleteIndexResponse.isAcknowledged();
}
添加索引数据
/**
* 保存文档
* @param kv 对应json映射里面键值对,index是索引名称
* @return
* @throws IOException
*/
public boolean save(Kv kv) throws IOException {
IndexRequest request = new IndexRequest(kv.getStr("index"))
.id(kv.getStr("id")).source(kv);
IndexResponse response = restHighLevelClient.index(request,RequestOptions.DEFAULT);
return response.isFragment();
}
删除索引数据
/**
* 根据id删除文档
* @param id
* @return
* @throws IOException
*/
public boolean delById(String id) throws IOException {
DeleteRequest request = new DeleteRequest(ModuleConstants.COMMODITY.toLowerCase(),id);
DeleteResponse response = restHighLevelClient.delete(request,RequestOptions.DEFAULT);
return response.isFragment();
}
IK,拼音,短语分词分页搜索
@Resource
private RestHighLevelClient restHighLevelClient;
@Resource
private OutputChannel outputChannel;
/**
分页分词关键词查询
* 使用QueryBuilder
termQuery("key", obj) 完全匹配
termsQuery("key", obj1, obj2..) 一次匹配多个值
matchQuery("key", Obj) 单个匹配, field不支持通配符, 前缀具高级特性
multiMatchQuery("text", "field1", "field2"..); 匹配多个字段, field有通配符忒行
matchAllQuery(); 匹配所有文件
* 组合查询
must(QueryBuilders) : AND
mustNot(QueryBuilders): NOT
should: : OR
percent_terms_to_match:匹配项(term)的百分比,默认是0.3
min_term_freq:一篇文档中一个词语至少出现次数,小于这个值的词将被忽略,默认是2
max_query_terms:一条查询语句中允许最多查询词语的个数,默认是25
stop_words:设置停止词,匹配时会忽略停止词
min_doc_freq:一个词语最少在多少篇文档中出现,小于这个值的词会将被忽略,默认是无限制
max_doc_freq:一个词语最多在多少篇文档中出现,大于这个值的词会将被忽略,默认是无限制
min_word_len:最小的词语长度,默认是0
max_word_len:最多的词语长度,默认无限制
boost_terms:设置词语权重,默认是1
boost:设置查询权重,默认是1
analyzer:设置使用的分词器,默认是使用该字段指定的分词器
*/
@Override
public Page<SearchVo> page(SearchVo searchVo){
Page<SearchVo> page = new Page(searchVo.getCurrent(),searchVo.getSize(),0);
// 页码
try {
// 构建查询
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 索引查询
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
//boost 设置权重
//分词查询
boolQueryBuilder.should(QueryBuilders.matchQuery("keyword", searchVo.getKeyword()).boost(2f));
//拼音查询
boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("keyword.pinyin", searchVo.getKeyword()).boost(2f));
//模糊查询,不区分大小写
// boolQueryBuilder.should(QueryBuilders.wildcardQuery("keyword", "*"+searchVo.getKeyword().toLowerCase()+"*").boost(2f));
//指定商家的性质
if(StrKit.notBlank(searchVo.getKeyword1())){
boolQueryBuilder.must(QueryBuilders.termQuery("ownerNature",searchVo.getKeyword1()));
}
//必须满足should其中一个条件
boolQueryBuilder.minimumShouldMatch(1);
//时间范围查询
// boolQueryBuilder.must(QueryBuilders.rangeQuery("createTime")
// .from(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss"))
// .to(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss")));
sourceBuilder.query(boolQueryBuilder);
//设置返回的字段
// String[] includeFields = new String[] {"keyword"};
// sourceBuilder.fetchSource(includeFields,null);
// 分页设置
sourceBuilder.from(searchVo.getFrom());
sourceBuilder.size(searchVo.getSize());
// sourceBuilder.sort("id", SortOrder.ASC); // 设置排序规则
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
SearchRequest searchRequest = new SearchRequest(searchVo.getIndex());
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
SearchHits searchHits = response.getHits();
page.setTotal(searchHits.getTotalHits().value);
List<SearchVo> list = new ArrayList<>();
for (SearchHit hit : searchHits.getHits()) {
SearchVo vo = new SearchVo();
Kv kv = Kv.create().set(hit.getSourceAsMap());
vo.setId(kv.getStr("id"));
vo.setKeyword(kv.getStr("keyword"));
vo.setKeyword1(kv.getStr("ownerNature"));
vo.setModel(kv.getStr("model"));
list.add(vo);
}
page.setRecords(list);
} catch (Exception e) {
e.printStackTrace();
}
//收集关键词搜索记录
searchVo.setIndex(ModuleConstants.KEYWORD.toLowerCase());
outputChannel.searchSaveOutput().send(MessageBuilder.withPayload(searchVo).build());
return page;
}
IK,拼音,短语分词分页并高亮关键词搜索
@Resource
private RestHighLevelClient restHighLevelClient;
@Override
public Page<SearchVo> pageHigh(SearchVo searchVo){
Page<SearchVo> page = new Page(searchVo.getCurrent(),searchVo.getSize(),0);
// 页码
try {
// 构建查询
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 索引查询
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
//boost 设置权重
//分词查询
boolQueryBuilder.should(QueryBuilders.matchQuery("keyword", searchVo.getKeyword()).boost(2f));
//拼音查询
boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("keyword.pinyin", searchVo.getKeyword()).boost(2f));
//模糊查询,不区分大小写
// boolQueryBuilder.should(QueryBuilders.wildcardQuery("keyword", "*"+searchVo.getKeyword().toLowerCase()+"*").boost(2f));
//必须满足should其中一个条件
boolQueryBuilder.minimumShouldMatch(1);
//时间范围查询
// boolQueryBuilder.must(QueryBuilders.rangeQuery("createTime")
// .from(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss"))
// .to(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss")));
sourceBuilder.query(boolQueryBuilder);
//设置返回的字段
String[] includeFields = new String[] {"keyword"};
sourceBuilder.fetchSource(includeFields,null);
// 高亮设置
List<String> highlightFieldList = new ArrayList<>();
highlightFieldList.add("keyword");
HighlightBuilder highlightBuilder = new HighlightBuilder();
for (int x = 0; x < highlightFieldList.size(); x++) {
HighlightBuilder.Field field = new HighlightBuilder.Field(highlightFieldList.get(x)).preTags("<high>").postTags("</high>");
highlightBuilder.field(field);
}
sourceBuilder.highlighter(highlightBuilder);
// 分页设置
sourceBuilder.from(searchVo.getFrom());
sourceBuilder.size(searchVo.getSize());
// sourceBuilder.sort("id", SortOrder.ASC); // 设置排序规则
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
//不指定索引,则搜索所有的索引
SearchRequest searchRequest = new SearchRequest(searchVo.getIndex());
searchRequest.source(sourceBuilder);
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
SearchHits searchHits = response.getHits();
page.setTotal(searchHits.getTotalHits().value);
List<SearchVo> list = new ArrayList<>();
Pattern pattern = Pattern.compile("(?i)"+searchVo.getKeyword());
for (SearchHit hit : searchHits.getHits()) {
SearchVo vo = new SearchVo();
Kv kv = Kv.create().set(hit.getSourceAsMap());
vo.setKeyword(kv.getStr("keyword"));
//高亮字段(拼音不做高亮,拼音的高亮有问题,会将整个字符串高亮)
if (!StringUtils.isEmpty(hit.getHighlightFields().get("keyword"))) {
Text[] text = hit.getHighlightFields().get("keyword").getFragments();
vo.setKeyword(text[0].toString());
}
//ngram短语,模糊搜索高亮,不区分大小写直接字符串替换
String keyword = vo.getKeyword();
if(!keyword.contains("<high>")){
Matcher matcher = pattern.matcher(keyword);
if(matcher.find()){
String s = matcher.group();
vo.setKeyword(keyword.replace(s,"<high>"+s+"</high>"));
}
}
list.add(vo);
}
page.setRecords(list);
} catch (Exception e) {
e.printStackTrace();
}
return page;
}