fromlangchain.retrieversimportContextualCompressionRetriever fromlangchain.retrievers.document_compressorsimportFlashrankRerank fromlangchain_openaiimportChatOpenAI llm = ChatOpenAI(temperature=0) compressor = FlashrankRerank() compression_retriever = ContextualCompressionRetriever( base_compressor=compressor, base_retriever=retriever ) compressed_docs = compression_retriever.invoke( "What did the president say about Ketanji Jackson Brown" ) print([doc.metadata["id"]fordocincompressed_docs]) pretty_print_docs(compressed_docs)
这段代码利用FlashrankRerank对基础检索器(retriever)检索到的文档进行重排,根据它们与查询“总统对Ketanji Jackson Brown说了些什么”的相关性进行排序。最终,它会打印出文档的ID和经过压缩、重排后的文档内容。
importopenai # Set your OpenAI API key openai.api_key ='YOUR_API_KEY' defpointwise_rerank(query, document): prompt =f"Rate the relevance of the following document to the query on a scale from 1 to 10:\n\nQuery:{query}\nDocument:{document}\n\nRelevance Score:" response = openai.ChatCompletion.create( model="gpt-4-turbo", messages=[{"role":"user","content": prompt}] ) returnresponse['choices'][0]['message']['content'].strip() deflistwise_rerank(query, documents): # Use a sliding window approach to rerank documents window_size =5 reranked_docs = [] foriinrange(0, len(documents), window_size): window = documents[i:i + window_size] prompt =f"Given the query, please rank the following documents:\n\nQuery:{query}\nDocuments:{', '.join(window)}\n\nRanked Document Identifiers:" response = openai.ChatCompletion.create( model="gpt-4-turbo", messages=[{"role":"user","content": prompt}] ) ranked_ids = response['choices'][0]['message']['content'].strip().split(', ') reranked_docs.extend(ranked_ids) returnreranked_docs defpairwise_rerank(query, documents): scores = {} foriinrange(len(documents)): forjinrange(i +1, len(documents)): doc1 = documents[i] doc2 = documents[j] prompt =f"Which document is more relevant to the query?\n\nQuery:{query}\nDocument 1:{doc1}\nDocument 2:{doc2}\n\nAnswer with '1' for Document 1, '2' for Document 2:" response = openai.ChatCompletion.create( model="gpt-4-turbo", messages=[{"role":"user","content": prompt}] ) winner = response['choices'][0]['message']['content'].strip() ifwinner =='1': scores[doc1] = scores.get(doc1,0) +1 scores[doc2] = scores.get(doc2,0) elifwinner =='2': scores[doc2] = scores.get(doc2,0) +1 scores[doc1] = scores.get(doc1,0) # Sort documents based on scores ranked_docs = sorted(scores.items(), key=lambdaitem: item[1], reverse=True) return[docfordoc, scoreinranked_docs] # Example usage query ="What are the benefits of using LLMs for document reranking?" documents = [ "LLMs can process large amounts of text quickly.", "They require extensive fine-tuning for specific tasks.", "LLMs can generate human-like text responses.", "They are limited by their training data and may produce biased results." ] # Pointwise Reranking fordocindocuments: score = pointwise_rerank(query, doc) print(f"Document:{doc}- Relevance Score:{score}") # Listwise Reranking reranked_listwise = listwise_rerank(query, documents) print(f"Listwise Reranked Documents:{reranked_listwise}") # Pairwise Reranking reranked_pairwise = pairwise_rerank(query, documents) print(f"airwise Reranked Documents:{reranked_pairwise}")