elasticsearch自动补全建议功能
数据入库操作
ESmapping要求
PUT music
{
"mappings": {
"_doc" : {
"properties" : {
"suggest" : {
"type" : "completion"
},
"title" : {
"type": "keyword"
}
}
}
}
}
DocType类
from elasticsearch_dsl import DocType, Date, Nested, Boolean, \
analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer
from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer
from elasticsearch_dsl.connections import connections
connections.create_connection(hosts=["localhost"])
class CustomAnalyzer(_CustomAnalyzer):
"""
避免ik_analyzer参数传递时会报错的问题
"""
def get_analysis_definition(self):
return {}
ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])
class ArticleType(DocType):
suggest = Completion(analyzer=ik_analyzer)
...
Items类
from models.es_types import ArticleType
from elasticsearch_dsl.connections import connections
es = connections.create_connection(ArticleType._doc_type.using)
def gen_suggests(index, info_tuple):
# 根据字符串生成搜索建议数组
used_words = set()
suggests = []
for text, weight in info_tuple:
if text:
# 调用es的analyze接口分析字符串
words = es.indices.analyze(index=index, analyzer="ik_max_word", params={'filter':["lowercase"]}, body=text)
anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1])
new_words = anylyzed_words - used_words
else:
new_words = set()
if new_words:
suggests.append({"input":list(new_words), "weight":weight})
class JobBoleArticleItem(scrapy.Item):
...
def save_to_es(self):
...
article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10),(article.tags, 7)))
article.save()
redis_cli.incr("jobbole_count")
return
ES搜索语法
POST myindex/_search?pretty
{
"suggest": {
"my-suggest": {
"text": "linux",
"completion": {
"field": "suggest",
"fuzzy": {
"fuzziness": 2
}
}
}
},
"_source": ["title"]
}
自动补全建议核心代码
# django_views中的写法
from search.models import ArticleType
class SearchSuggest(View):
def get(self, request):
key_words = request.GET.get('s','')
re_datas = []
if key_words:
s = ArticleType.search()
s = s.suggest('my_suggest', key_words, completion={
"field":"suggest", "fuzzy":{
"fuzziness":2
},
"size": 10
})
suggestions = s.execute_suggest()
for match in suggestions.my_suggest[0].options:
source = match._source
re_datas.append(source["title"])
return HttpResponse(json.dumps(re_datas), content_type="application/json")
elasticsearch内容搜索功能
数据入库操作
和上面一样
搜索核心代码
# django_views中的写法
from elasticsearch import Elasticsearch
client = Elasticsearch(hosts=["127.0.0.1"])
class SearchView(View):
def get(self, request):
key_words = request.GET.get("q","")
s_type = request.GET.get("s_type", "article")
page = request.GET.get("p", "1")
try:
page = int(page)
except:
page = 1
start_time = datetime.now()
response = client.search(
index= "jobbole",
body={
"query":{
"multi_match":{
"query":key_words,
"fields":["tags", "title", "content"]
}
},
"from":(page-1)*10,
"size":10,
"highlight": {
"pre_tags": ['<span class="keyWord">'],
"post_tags": ['</span>'],
"fields": {
"title": {},
"content": {},
}
}
}
)
end_time = datetime.now()
last_seconds = (end_time-start_time).total_seconds()
total_nums = response["hits"]["total"]
if (page%10) > 0:
page_nums = int(total_nums/10) +1
else:
page_nums = int(total_nums/10)
hit_list = []
for hit in response["hits"]["hits"]:
hit_dict = {}
if "title" in hit["highlight"]:
hit_dict["title"] = "".join(hit["highlight"]["title"])
else:
hit_dict["title"] = hit["_source"]["title"]
if "content" in hit["highlight"]:
hit_dict["content"] = "".join(hit["highlight"]["content"])[:500]
else:
hit_dict["content"] = hit["_source"]["content"][:500]
hit_dict["create_date"] = hit["_source"]["create_date"]
hit_dict["url"] = hit["_source"]["url"]
hit_dict["score"] = hit["_score"]
hit_list.append(hit_dict)
return render(request, "result.html", {"page":page,
"all_hits":hit_list,
"key_words":key_words,
"total_nums":total_nums,
"page_nums":page_nums,
"last_seconds":last_seconds
})
scrapy框架+django框架组合使用
github项目参考