使用Python 的接口库elasticsearch 对ES数据库进行操作
安装
pip install elasticsearch
ES 文档:https://elasticsearch-py.readthedocs.io/en/master/
1、创建新的索引中文搜索需要制定ik分词器,类似结巴jieba
IK分词器文档: https://github.com/medcl/elasticsearch-analysis-ik
PUT http://localhost:9200/blog
{
"settings" : {
"index" : {
"analysis.analyzer.default.type": "ik_max_word"
}
}
}
2、检查分词效果
如果没有使用中文分词器,默认单个字符分隔,出现词组说明分词器设置成功
POST http://localhost:9200/blog/_analyze
{"field":"title", "text":"拼多多确认警方成立专案组 实际资损大概率低于千万"}
{
"tokens": [
{
"token": "拼",
"start_offset": 0,
"end_offset": 1,
"type": "CN_CHAR",
"position": 0
},
{
"token": "多多",
"start_offset": 1,
"end_offset": 3,
"type": "CN_WORD",
"position": 1
},
{
"token": "确认",
"start_offset": 3,
"end_offset": 5,
"type": "CN_WORD",
"position": 2
},
{
"token": "警方",
"start_offset": 5,
"end_offset": 7,
"type": "CN_WORD",
"position": 3
}
...
]
}
3、添加数据
from elasticsearch import Elasticsearch
# 实例化
es = Elasticsearch()
# 批量提交数据, 注意格式,一行指令一行数据
bulk_doc = """
{"index":{ "_index": "blog", "_type": "post", "_id": "001" }}
{"title": "比亚迪:今年将推出多款新车型","post_time": "2019-01-21 14:22:58","source": "36氪"}
{"index":{ "_index": "blog", "_type": "post", "_id": "002" }}
{"title": "亚马逊:2018年近20万第三方卖家年销售额超10万美元,同比增长40%","post_time": "2019-01-21 14:21:01","source": "雨果网"}
{"index":{ "_index": "blog", "_type": "post", "_id": "003" }}
{"title": "拼多多确认警方成立专案组 实际资损大概率低于千万","post_time": "2019-01-21 14:15:52","source": "新浪财经"}
"""
result = es.bulk(bulk_doc)
print(result)
"""
{
"took":30,
"errors":false,
"items":[
{
"index":{
"_index":"blog",
"_type":"post",
"_id":"001",
"_version":1,
"result":"created",
"_shards":{
"total":2,
"successful":1,
"failed":0
},
"_seq_no":0,
"_primary_term":1,
"status":201
}
}
...
]
}
"""
4、搜索查询
query_body = {
"query": {
"term": {
"title": "多多"
}
}
}
ret = es.search("blog", "post", query_body)
print(ret)
"""
{
"took":2,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":1,
"max_score":0.2876821,
"hits":[
{
"_index":"blog",
"_type":"post",
"_id":"003",
"_score":0.2876821,
"_source":{
"title":"拼多多确认警方成立专案组 实际资损大概率低于千万",
"post_time":"2019-01-21 14:15:52",
"source":"新浪财经"
}
}
]
}
}
"""
如果不使用分词器,也可以使用短语查询
query_body = {
"query": {
"match_phrase": {
"title": "拼多多"
}
}
}