医疗知识图谱问答 —— 数据同步

发布时间 2023-08-02 10:35:19作者: 北桥苏

前言

        前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。

 

环境

Anaconda3

Python3.8

Py2neo (新版)

 

数据来源 (结构)

 

编码

1. 引入依赖

import json
from py2neo import Graph, Node

2. 类的初始化 (连接 neo4j)

def __init__(self):
	self.data_path = "./data/medical.json"
	self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))

3.  读取数据

def read_data(self):
	# 疾病
	diseases = []
	# 症状
	symptoms = []
	# 科室
	departments = []
	# 药品
	drugs = []
	# 食物
	foods = []
	# 出药厂商
	producers = []
	# 检查项目
	checks = []

	# 疾病信息
	disease_info = []

	# 疾病与症状
	rels_symptom = []
	# 疾病与并发症
	rels_acompany = []
	# 疾病与科室
	rels_category = []
	# 科室与科室
	rels_department = []
	# 疾病与通用药品
	rels_commondrug = []
	# 疾病与推荐药品
	rels_recommenddrug = []
	# 疾病与不可吃
	rels_noteat = []
	# 疾病与可以吃
	rels_doeat = []
	# 疾病与推荐吃
	rels_recommendeat = []
	# 疾病与检查项
	rels_check = []
	# 厂商与药品
	rels_drug_producer = []

	for data in open(self.data_path, encoding="utf8", mode="r"):
		data_json = json.loads(data)
		disease = data_json['name']
		disease_dict = dict()
		disease_dict['get_prob'] = ''
		disease_dict['yibao_status'] = ''
		disease_dict['easy_get'] = ''
		disease_dict['get_way'] = ''
		disease_dict['cure_lasttime'] = ''
		disease_dict['cured_prob'] = ''
		disease_dict['cost_money'] = ''
		disease_dict['cure_department'] = []
		diseases.append(disease)
		disease_dict['name'] = disease
		disease_dict['desc'] = data_json['desc']
		disease_dict['prevent'] = data_json['prevent']
		disease_dict['cause'] = data_json['cause']

		if "get_prob" in data_json:
			disease_dict['get_prob'] = data_json['get_prob']
		if "yibao_status" in data_json:
			disease_dict['yibao_status'] = data_json['yibao_status']
		if "easy_get" in data_json:
			disease_dict['easy_get'] = data_json['easy_get']
		if "get_way" in data_json:
			disease_dict['get_way'] = data_json['get_way']
		if "cure_lasttime" in data_json:
			disease_dict['cure_lasttime'] = data_json['cure_lasttime']
		if "cured_prob" in data_json:
			disease_dict['cured_prob'] = data_json['cured_prob']
		if "cost_money" in data_json:
			disease_dict['cost_money'] = data_json['cost_money']
		disease_info.append(disease_dict)

		symptom = data_json['symptom']
		for symptom_i in symptom:
			rels_symptom.append([disease, symptom_i])
		symptoms += symptom

		# 科室
		if "cure_department" in data_json:
			cure_department = data_json['cure_department']
			departments += cure_department
			if len(cure_department) == 1:
				rels_category.append([disease, cure_department[0]])
			if len(cure_department) == 2:
				large = cure_department[0]
				small = cure_department[1]
				rels_department.append([large, small])
				rels_category.append([disease, large])
			disease_dict['cure_department'] = cure_department

		# 并发症
		if 'acompany' in data_json:
			acompanys = data_json['acompany']
			for acompany in data_json['acompany']:
				rels_acompany.append([disease, acompany])
			symptoms += acompanys

		if 'common_drug' in data_json:
			commondrug = data_json['common_drug']
			drugs += commondrug
			for drug_c in commondrug:
				rels_commondrug.append([disease, drug_c])

			recommenddrug = data_json['recommand_drug']
			for drug_recom in recommenddrug:
				rels_recommenddrug.append([disease, drug_recom])
			drugs += recommenddrug

		if 'not_eat' in data_json:
			noteat = data_json['not_eat']
			for noteat_i in noteat:
				rels_noteat.append([disease, noteat_i])
			foods += noteat

		if 'do_eat' in data_json:
			doeat = data_json['do_eat']
			for doeat_i in doeat:
				rels_doeat.append([disease, doeat_i])
			foods += doeat

		if 'recommand_eat' in data_json:
			recommendfood = data_json['recommand_eat']
			for food_i in recommendfood:
				rels_recommendeat.append([disease, food_i])
			foods += recommendfood

		checkitem = data_json['check']
		for check_i in checkitem:
			check_i.replace("'", "")
			if check_i != "血清5'-核苷酸酶(5'-NT)":
				rels_check.append([disease, check_i])
		checks += checkitem

		# 厂商与药品
		druginfo = data_json['drug_detail']
		producers += [name.split("(")[0] for name in druginfo]
		rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo]

	return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set(
		checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \
		   rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer

4. 创建节点

def create_medical_nodes(self):
	print("start create nodes")
	diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\
	rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\
	rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \
		build_medical_graph.read_data()

	# 创建疾病节点
	# self.create_node('Diseases', diseases)
	# 创建症状节点
	# self.create_node('Symptoms', symptoms)
	# 创建科室
	# self.create_node('Departments', departments)
	# 创建药品
	# self.create_node('Drugs', drugs)
	# 创建食品
	# self.create_node('Foods', foods)
	# 创建出药厂商
	# self.create_node('Producers', producers)
	# 创建检查项
	# self.create_node('Checks', checks)
	self.create_disease_node('Diseases', disease_info)
	return

# 疾病节点单独创建
def create_node(self, label, values):
	count = 0;
	for val in values:
		count += 1
		print("节点: " + label + ", 名称为: " + val)
		node = Node(label, name = val)
		self.neo4j.create(node)
	return count

def create_disease_node(self, label, values):
	count = 0
	for disease in values:
		print("节点" + label + ", 名称:" + disease['name'])
		node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'],
					get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'],
					get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'],
					cost_money=disease['cost_money'],cure_department=disease['cure_department'])
		self.neo4j.create(node)
	return count

5. 创建关联边

def create_medical_rels(self):
	print("start create rels")
	diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
	rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
	rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
		build_medical_graph.read_data()

	# 疾病与状态
	# self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状")
	# 疾病与并发症
	# self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症")
	# 疾病与科室
	# self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室")
	# 科室与科室
	# self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属")
	# 疾病与通用药品
	# self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药")
	# 疾病与推荐药品
	# self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药")
	# 疾病与忌口
	# self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃")
	# 疾病与可以吃
	# self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃")
	# 疾病与推荐吃
	# self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃")
	# 疾病与检查项
	self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查")
	# 厂商与药品
	# self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")


def create_rel(self, start_node, end_node, list, rel_name, rel_attr):
	count = 0
	for item in list:
		count += 1
		s = item[0]
		e = item[1]

		print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e)

		query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % (
			start_node, end_node, s, e, rel_name, rel_attr
		)
		self.neo4j.run(query)

	return count

6. 导出节点数据

# 导出实体的节点分词
def export_data(self):
	diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
	rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
	rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
		build_medical_graph.read_data()

	# 疾病名
	# f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+")
	# f_diseases.write("\n".join(list(diseases)))
	# 症状名
	f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+")
	f_symptoms.write("\n".join(list(symptoms)))

	f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+")
	f_producers.write("\n".join(list(producers)))

	f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+")
	f_departments.write("\n".join(list(departments)))

	f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+")
	f_drugs.write("\n".join(list(drugs)))

	f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+")
	f_foods.write("\n".join(list(foods)))

	f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
	f_checks.write("\n".join(list(checks)))

	f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
	f_checks.write("\n".join(list(checks)))