## Initialize RAG system rag_system = MistralRAGSystem()
## Pre-populate knowledge graph with some initial data definitialize_knowledge_base(): knowledge_items = [ { "id":"ai_basics", "content":"Artificial Intelligence is a broad field of computer science focused on creating intelligent machines that can simulate human-like thinking and learning capabilities.", "metadata": {"category":"introduction","difficulty":"beginner"} }, { "id":"ml_fundamentals", "content":"Machine Learning is a subset of AI that enables systems to learn and improve from experience without being explicitly programmed, using algorithms that can learn from and make predictions or decisions based on data.", "metadata": {"category":"core_concept","difficulty":"intermediate"} } ]
## Initialize knowledge base initialize_knowledge_base()
@cl.on_chat_start asyncdefstart(): awaitcl.Message(content="RAG System with Mistral is ready! How can I help you today?").send()
@cl.on_message asyncdefmain(message: cl.Message): # Check if the message is a knowledge addition command ifmessage.content.startswith("/add_knowledge"): # Parse the message to extract node_id and content parts = message.content.split(maxsplit=3) iflen(parts) <3: awaitcl.Message(content="Usage: /add_knowledge <node_id> <content>").send() return
# Get Hugging Face API key from environment variable self.api_key = os.getenv('MISTRAL_API_KEY') ifnotself.api_key: raiseValueError("HUGGINGFACE_API_KEY must be set in .env file")
# Default model (corrected name) self.model ="mistralai/Mistral-7B-v0.1"
# Hugging Face Inference API endpoint for Mistral model url =f'https://api-inference.huggingface.co/models/{self.model}'
# Make the POST request to generate a response response = requests.post(url, json=payload, headers=headers)
# Check if the request was successful ifresponse.status_code ==200: #return response.json()[0]['generated_text'] generated_text = response.json()[0]['generated_text']
defadd_node(self, node_id:str, content:str, metadataict[str,Any] =None): """ Add a node to the knowledge graph and embed its content
Args: node_id (str): Unique identifier for the node content (str): Text content of the node metadata (dict, optional): Additional metadata for the node """ # Add to networkx graph self.graph.add_node(node_id, content=content, metadata=metadataor{})
# Ensure metadata is a non-empty dictionary metadata = metadataor{}
# Add to ChromaDB self.collection.add( ids=[node_id], embeddings=[embedding], documents=[content], metadatas=[metadata] # Ensure that the metadata is a valid dictionary )
defadd_edge(self, source:str, target:str, relationship:str=None): """ Add a directed edge between two nodes
Args: source (str): Source node ID target (str): Target node ID relationship (str, optional): Type of relationship """ self.graph.add_edge(source, target, relationship=relationship)
defretrieve_similar_nodes(self, query:str, top_k:int=3): """ Retrieve most similar nodes to a given query.
Args: query (str): Search query top_k (int): Number of top similar nodes to retrieve.
Returns: List of most similar nodes. """ # Generate query embedding query_embedding =self.embedding_model.encode(query).tolist()
# Get the total number of nodes in the collection total_nodes =self.collection.count()
# Adjust top_k if it exceeds the number of available nodes top_k =min(top_k, total_nodes)
# Retrieve from ChromaDB results =self.collection.query( query_embeddings=[query_embedding], n_results=top_k )
# Return the documents (already adjusted for n_results) returnresults.get('documents', [])
## Example usage defcreate_sample_knowledge_graph(): kg = KnowledgeGraphRAG() #persist_directory="./my_knowledge_base_data2"
# Add some sample nodes about AI kg.add_node("ai_intro","人工智能是计算机科学的一个分支") kg.add_node("ml_intro","机器学习是 AI 的一个子集,专注于从数据中学习") kg.add_node("dl_intro","深度学习使用具有多个层的神经网络")
# Add some relationships kg.add_edge("ai_intro","ml_intro","包含") kg.add_edge("ml_intro","dl_intro","高级技术")
returnkg
## For testing if__name__ =="__main__": kg = create_sample_knowledge_graph() kg.visualaze_graph()
# Example retrieval results = kg.retrieve_similar_nodes("神经网络") print(results)