etcd: 分布式键值存储,用于Milvus集群元数据管理
minio: 对象存储服务,存储向量数据和索引文件
milvus-standalone: 向量数据库主服务
attu: Milvus的Web管理界面
网站URL→网页抓取→HTML解析→文本分割→向量化→存储到Milvus
用户查询→向量化→相似性搜索→知识库检索→LLM生成答案
fmt="\n==={:30}===\n"search_latency_fmt="searchlatency={:.4f}s"num_entities,dim=8,8milvus_collection_name="ai_answer"classSearchEngine:def__init__(self):connections.connect("default",host="localhost",port="19530")has=utility.has_collection(milvus_collection_name)print(f"Doescollectionmilvus_collection_nameexistinMilvus:{has}")milvus_client=Collection(milvus_collection_name)self.milvus_client=milvus_clientself.milvus_collection_name=milvus_collection_nameopenai.api_key=os.environ["OPENAI_API_KEY"]defquery_milvus(self,embedding):result=self.milvus_client.search([embedding],"vector",{"metric_type":"L2","offset":1},1,None,None,["id","vector","path","text"])list_of_knowledge_base=list(map(lambdamatch:match.entity.text,result[0]))return{'list_of_knowledge_base':list_of_knowledge_base,}defquery_vector_db(self,embedding):returnself.query_milvus(embedding)#indexer_by_milvus.pydefadd_html_to_vectordb(self,content,path):text_splitter=RecursiveCharacterTextSplitter(chunk_size=self.MODEL_CHUNK_SIZE,#8192chunk_overlap=math.floor(self.MODEL_CHUNK_SIZE/10)#819)docs=text_splitter.create_documents([content])fordocindocs:embedding=create_embedding(doc.page_content)self.insert_embedding(embedding,doc.page_content,path)definsert_embedding(self,embedding,text,path):try:print(fmt.format("Startinsertingentities"))data=[{"vector":np.array(embedding),"text":text,"path":path},]self.milvus_client.insert(data)exceptExceptionase:print("self.milvus_client.insertexceptione:",e)os._exit(1)# search_engine.pydefsearch(self, user_query):print("user_query: ", user_query)embedding = create_embedding(user_query)result = self.query_vector_db(embedding)knowledge_base ="\n".join(result['list_of_knowledge_base'])response = self.ask_chatgpt(knowledge_base, user_query)return{'response': response}defask_chatgpt(self, knowledge_base, user_query):system_content ="""你是一个专业的智能问答助手,请严格遵循以下规则:1. 只能基于提供的知识库内容回答问题,不得使用知识库以外的信息;2. 如果知识库中没有相关信息或无法找到准确答案,请明确告知用户"我无法在知识库中找到相关信息来回答这个问题";3. 回答时要客观准确,不得编造或推测信息;4. 尽量使用知识库中的原始表述,确保信息的准确性和权威性。"""user_content =f"""Knowledge Base!---{knowledge_base}---User Query:{user_query}Answer:{user_history_answer}"""system_message = {"role":"system","content": system_content}user_message = {"role":"user","content": user_content}chatgpt_response = create_llama2_13b(messages=[system_message, user_message])returnchatgpt_response["choices"][0]["message"]["content"]
| 欢迎光临 链载Ai (https://www.lianzai.com/) | Powered by Discuz! X3.5 |