def explore_impact_on_product(graph, product_name): query = """ MATCH (product {name: $product_name})<-[r:IMPACTS]-(m) RETURN m.name AS Influencer, r.description AS ImpactDescription """ result = graph.run(query, product_name=product_name) for record in result: print(f"Influencer: {record['Influencer']}, Impact: {record['ImpactDescription']}")
from neo4j import GraphDatabase from typing import Optional, Union, List, Dict import numpy as np from openai import OpenAI from pyvis.network import Network
def calculate_similarity(embedding1, embedding2): # Placeholder for similarity calculation, e.g., using cosine similarity # Ensure both embeddings are numpy arrays for calculation return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
class NodeSimilaritySearchMan():
def __init__(self, neo4j_driver: GraphDatabase): """ Initialize the NodeSimilaritySearchMan with a Neo4j driver instance.
Args: neo4j_driver (GraphDatabase): The Neo4j driver to facilitate connection to the database. """ self.driver = neo4j_driver
def find_relationship_neighbors(self, node_name: str) -> List[Dict[str, Union[int, str]]]: """ Finds neighbors of a given node based on direct relationships in the graph.
Args: node_name (str): The name of the node for which to find neighbors.
Returns: List[Dict[str, Union[int, str]]]: A list of dictionaries, each representing a neighbor with its ID and name. """
result = self.driver.execute_query( """ MATCH (n)-[r]->(neighbor) WHERE n.name = $node_name RETURNneighbor.name AS name, type(r) AS relationship_type """, {"node_name": node_name} )
neighbors = [{ "name": record["name"], "relationship_type": record["relationship_type"]} for record in result] return neighbors
def visualize_relationship_graph_interactive(self,neighbors, node_name,graph_name, edge_label='relationship_type'): # Initialize the Network with cdn_resources set to 'remote' net = Network(notebook=True, cdn_resources='remote')
# Add the main node net.add_node(node_name, label=node_name, color='red')
# Add neighbors and edges to the network for neighbor in neighbors: title = neighbor.get('neighbor_chunks_summary', '') if edge_label == 'similarity':# Adjust title for similarity title += f" (Similarity: {neighbor[edge_label]})" else: title += f" ({edge_label}: {neighbor[edge_label]})" net.add_node(neighbor['name'], label=neighbor['name'], title=title) net.add_edge(node_name, neighbor['name'], title=str(neighbor[edge_label]))
# Add documents to your namespace documents_response = client.graph.add_documents( namespace = namespace, documents = documents )
其次,我们为图定义了所需的模式
{ "entities": [ { "name": "Company", "description": "The company discussed in the document, specifically Apple Inc." }, { "name": "Financial_Metric", "description": "Quantitative measures of Apple's financial performance, including revenue, gross margin, operating expenses, net cash position, etc." }, { "name": "roduct", "description": "hysical goods produced by Apple, such as iPhone, Mac, iPad, Apple Watch." }, { "name": "Service", "description": "Services offered by Apple, including Apple TV+, Apple Music, iCloud, Apple Pay." }, { "name": "Geographic_Segment", "description": "Market areas where Apple operates, such as Americas, Europe, Greater China, Japan, Rest of Asia Pacific." }, { "name": "Executive", "description": "Senior leaders of Apple who are often quoted or mentioned in earnings calls, like CEO (Tim Cook), CFO (Luca Maestri)." }, { "name": "Market_Condition", "description": "External economic or market factors affecting Apple's business, such as inflation, foreign exchange rates, geopolitical tensions." }, { "name": "Event", "description": "Significant occurrences influencing the company, including product launches, earnings calls, and global or regional economic events." }, { "name": "Time_Period", "description": "Specific time frames discussed in the document, typically fiscal quarters or years." } ], "relations": [ { "name": "Reports", "description": "An executive discusses specific financial metrics, typically during an earnings call." }, { "name": "Impacts", "description": "Describes the influence of events or market conditions on financial metrics, products, services, or geographic segments." }, { "name": "Operates_In", "description": "Denotes the geographic areas where Apple's products and services are available." }, { "name": "resents", "description": "Associates products or services with their financial performance metrics, as presented in earnings calls or official releases." }, { "name": "Occurs_During", "description": "Connects an event with the specific time period in which it took place." }, { "name": "Impacted_By", "description": "Shows the effect of one entity on another, such as a financial metric being impacted by a market condition." }, { "name": "Offers", "description": "Indicates that the company provides certain services." }, { "name": "Influences", "description": "Indicates the effect of strategies or innovations on various aspects of the business." } ], "patterns": [ { "head": "Executive", "relation": "Reports", "tail": "Financial_Metric", "description": "An executive reports on a financial metric, such as revenue growth or operating margin." }, { "head": "Event", "relation": "Impacts", "tail": "Financial_Metric", "description": "An event, like a product launch or economic development, impacts a financial metric." }, { "head": "roduct", "relation": "resents", "tail": "Financial_Metric", "description": "A product is associated with specific financial metrics during a presentation, such as sales figures or profit margins." }, { "head": "roduct", "relation": "Operates_In", "tail": "Geographic_Segment", "description": "A product is available in a specific geographic segment." }, { "head": "Event", "relation": "Occurs_During", "tail": "Time_Period", "description": "An event such as an earnings call occurs during a specific fiscal quarter or year." }, { "head": "Financial_Metric", "relation": "Impacted_By", "tail": "Market_Condition", "description": "A financial metric is affected by a market condition, such as changes in foreign exchange rates." }, { "head": "Company", "relation": "Offers", "tail": "Service", "description": "Apple offers a service like Apple Music or Apple TV+." }, { "head": "Service", "relation": "Influences", "tail": "Market_Condition", "description": "A service influences market conditions, potentially affecting consumer behavior or competitive dynamics." } ] }
from langchain_community.document_loaders import PyPDFLoader from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langchain_openai import OpenAIEmbeddings from langchain import hub from langchain_openai import ChatOpenAI from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain.retrievers.document_compressors import FlashrankRerank from langchain.retrievers import ContextualCompressionRetriever import os from langchain import PromptTemplate, LLMChain from langchain_cohere import CohereRerank from cohere import Client from dotenv import load_dotenv load_dotenv() cohere_api_key = os.getenv("COHERE_API_KEY") co = Client(cohere_api_key) class CustomCohereRerank(CohereRerank): class Config(): arbitrary_types_allowed = True
CustomCohereRerank.update_forward_refs()
def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs)
def query_vector_db(query,faiss_index):
retriever = faiss_index.as_retriever() compressor = CustomCohereRerank(client=co) compression_retriever = ContextualCompressionRetriever( base_compressor=compressor, base_retriever=retriever ) template = """You are a helpful assistant who is able to answer any question using the provided context. Answer the question using just the context provided to you question: {question} context: {context} Provide a concise response with maximum three sentences""" prompt = PromptTemplate(template=template, input_variables=["context","question"]) llm = ChatOpenAI(model="gpt-4") rag_chain = LLMChain(prompt=prompt,llm=llm) docs = compression_retriever.invoke(query) context = format_docs(docs) answer = rag_chain.invoke({"question":query,"context":context}) return answer
完整性指的是系统在没有丢失重要细节的情况下,能够提供有关查询的所有相关信息。由于图数据库具有关联性,它们可以通过对所有相互连接的数据进行彻底搜索来提供全面的答案。相反,尽管矢量索引在查找类似文本块方面效率高,但不一定能够完整地捕捉到更广泛的上下文或数据点之间的相互关系。想象一下,如果我们需要直接影响苹果 Mac 产品线的所有市场条件的完整视图,这可能包括经济因素、供应链问题、竞争动态等等。我们可以定义一个 GraphQueryManager 类来获取这些信息:
from neo4j import GraphDatabase class GraphQueryManager: def __init__(self, uri, user, password): self.driver = GraphDatabase.driver(uri, auth=(user, password)) def close(self): self.driver.close() def get_impacting_market_conditions(self, product_name): with self.driver.session() as session: result = session.run(""" MATCH (n)-[r:IMPACTS]->(m) WHERE m.name=$product_name AND n.namespace="apple-earning-calls" RETURN n.name as Condition, r.description as Description, m.name as Product """, product_name=product_name) return [{"Condition": record["Condition"], "Description": record["Description"], "roduct": record["roduct"]} for record in result]
from langchain.chat_models import ChatOpenAI from langchain import PromptTemplate, LLMChain OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
def run_chain(question,txt_context): template = """ You are a helpful assistant who is able to answer any question using the provided context. Answer the question using just the context provided to you Question : {question}, Context:{context} Provide a concise response with maximum three sentences""" prompt = PromptTemplate(template=template, input_variables=["context","question"])
# load the model chat = ChatOpenAI(model_name="gpt-4",openai_api_key=OPENAI_API_KEY, temperature=0.0) chain = LLMChain(llm=chat, prompt=prompt) answer = chain.invoke({"question":question,'context':txt_context})
def query_graph_with_chain(question): context = client.graph.query_graph( query = question, namespace = "apple-earning-calls", include_chunks = True ) txt_context = context.answer txt = " " for chunk in context.chunks: for text in chunk.chunk_texts: txt += text txt_context += txt chain_answer = run_chain(question,txt_context) return chain_answer['text']
gr = query_graph_with_chain(question['question']) vc = query_vector_db(question['question'],index) print("Graph: ", gr) print("Vector: ", vc['text'])
MATCH (exec:EXECUTIVE)-[r1:REPORTS]->(metric:FINANCIAL_METRIC), (metric)-[r2:IMPACTED_BY]->(cond:MARKET_CONDITION), (prodRODUCT)-[r3RESENTS]->(metric), (prod)-[r4:OPERATES_IN]->(geo:GEOGRAPHIC_SEGMENT), (event:EVENT)-[r5:OCCURS_DURING]->(time:TIME_PERIOD), (event)-[r6:IMPACTS]->(metric) WHERE exec.name IN ['Tim Cook', 'Luca Maestri'] AND geo.name IN ['Americas', 'Europe', 'Greater China'] AND time.name IN ['Q1 2023', 'Q2 2023', 'Q3 2023'] RETURN exec, metric, cond, prod, geo, event, time, r1,r2,r3,r4,r5,r6