python操作elasticsearch-全文检索、拼写纠错、补全提示

发布时间 2023-07-17 15:24:45作者: 蕝戀

1、首先安装elasticsearch包

pip install elasticsearch (一般会包含新旧版本,如果想要特定的版本,比如5.x 可以在后面加5数字)

"""
1、首先安装elasticsearch包
    pip install elasticsearch (一般会包含新旧版本,如果想要特定的版本,比如5.x 可以在后面加5数字)
"""

from elasticsearch import Elasticsearch

hosts = [
    "http://127.0.0.1:9200"
]

es = Elasticsearch(hosts=hosts,
                   http_auth=("elastic", "swq1996"),
                   # basic_auth=("elastic", "swq1996"), # 这个没用..
                   # 启动前嗅探es集群服务器
                   sniff_on_start=True,
                   # es集群服务器节点连接异常时是否刷新es节点信息
                   sniff_on_connection_fail=True,
                   # 每60秒刷新节点信息
                   sniffer_timeout=60)


def test():
    ret = es.get(index="articles", doc_type="article", id="24")
    print(ret["_source"]["title"])

    # print(es.info(human=True))


def test_search(search_text):
    """全文检索"""
    body_dict = {
        "_source": ["title"],
        "query": {
            "match": {
                "title": search_text
            }
        }
    }
    ret = es.search(index="articles", doc_type="article", body=body_dict)
    print(ret)


def suggest_test(search_text):
    """拼写纠错"""
    body_dict = {
        "_source": ["title"],
        "suggest": {
            "text": search_text,  # 用户输入的文本(也就是想要被纠错的文本)
            "word-phrase": {  # //这个名字自己起的,会在查询结果中显示
                "phrase": {
                    "field": "title",  # 指定类型的字段
                    "size": 3
                }
            }
        }
    }
    ret = es.search(index="articles", doc_type="article", body=body_dict)
    print(ret)


def completion_suggest_test(search_text):
    """补全提示"""
    body_dict = {
        "suggest": {  # 建议查询的关键词
            "title-suggest": {  # 自己起名 用于获取结果
                "prefix": "地区",  # 用户输入的需要补全的关键词
                "completion": {  # 补全建议
                    "field": "suggest"  # 从当前表中的哪个字段(suggest)提供补全建议
                }
            }
        }
    }
    ret = es.search(index="completions", doc_type="words", body=body_dict)
    print(ret)


def dsl_test():
    # https://elasticsearch-dsl.readthedocs.io/en/latest/search_dsl.html
    from elasticsearch6_dsl import Search

    # 全文检索
    # search = Search(using=es, index="articles")
    # search.source("title,user_id")
    # search.query("match", title="中国")
    # search.filter("term", status=2)
    # search.sort("-user_id")
    # resp = search.execute()
    # print(resp.to_dict())

    # 分页,直接用切片的方式重新赋值即可
    # # {"from": 10, "size": 10}
    # search = search[10:20]

    # 拼写纠错
    search = Search(using=es, index="articles")
    # 第三个参数:term、phrase 、completion
    search = search.suggest("my_suggest", "pyth", phrase={'field': 'title'})
    # suggest.doc_type("article") # 他这里不需要指定类型了...相对于你用字典当作body传好了不少
    ret = search.execute()
    print(ret.suggest.my_suggest)
    print(ret.suggest.my_suggest[0].options[0].text)

    # 输入补全
    search = Search(using=es, index="completions")
    search = search.suggest("my_completion", "地区", completion={'field': 'suggest'})
    ret = search.execute()
    print(ret.suggest.my_completion)


if __name__ == '__main__':
    # test()
    # test_search("中国")
    # suggest_test("pyth")
    # completion_suggest_test("地区")
    dsl_test()

elasticsearch-dsl

原生操作elasticsearch是挺麻烦的,容易写错。因为有些查询要嵌套太多字典了.....

所以有一个叫elasticsearch-dsl的包,安装也是要根据es的版本来对应安装。

参考:https://elasticsearch-dsl.readthedocs.io/en/latest/