from langchain_openai import ChatOpenAI from langchain.chains.summarize import load_summarize_chain from langchain_community.document_loaders import WebBaseLoader from langchain.chains.combine_documents.stuff import StuffDocumentsChain from langchain.chains.llm import LLMChain from langchain_core.prompts import PromptTemplate from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain from langchain_text_splitters import CharacterTextSplitter from langchain.docstore.document import Document import tiktoken import openai import os openai.api_key = os.getenv("OPENAI_API_KEY")
# 读入harry potter 4的文本 with open("Harry Potter and the Goblet of Fire.txt") as f: text = f.read() text = text.replace("\n", " ") text = text.split("Chapter 31")[1]
看看待总结的文本前300个字符:
The Third Task
“Dumbledore reckons You-Know-Who’s getting stronger again as well?” Ron whispered. Everything Harry had seen in the Pensieve, nearly everything Dumbledore had told and shown him afterward, he had now shared with Ron and Hermione — and, of course, with Sirius, to whom Harry had sent
我们用tiktoken来计算整个文档的token数量:
# 计算整个待总结文档的token数 def num_tokens_from_string(string: str, encoding_name: str) -> int: """Returns the number of tokens in a text string.""" encoding = tiktoken.get_encoding(encoding_name) num_tokens = len(encoding.encode(string)) return num_tokens
# 写法1:使用 load_summarize_chain prompt_template = """Write a concise summary in chinese of the following: "{text}" CONCISE SUMMARY:""" prompt = PromptTemplate(template=prompt_template, input_variables=["text"]) llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106") chain = load_summarize_chain(llm, chain_type="stuff",prompt=prompt) result = chain.run([Document(texts[0].page_content)]) result
# 写法2:使用 StuffDocumentsChain prompt_template = """Write a concise summary in chinese of the following: "{text}" CONCISE CHINESE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106") llm_chain = LLMChain(llm=llm, prompt=prompt) chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text") result = chain.run([Document(texts[0].page_content)]) result
# 写法1:使用 load_summarize_chain prompt_template = """Write a concise summary in chinese of the following text: "{text}" CONCISE CHINESE SUMMARY:""" prompt = PromptTemplate(template=prompt_template, input_variables=["text"]) llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-1106") chain = load_summarize_chain(llm, chain_type="map_reduce", map_prompt=prompt, combine_prompt=prompt,token_max = 10000) result = chain.run(texts)
我们也可以使用MapReduceDocumentsChain:
# 写法2,使用 MapReduceDocumentsChain,自定义map_prompt和reduce_prompt # map chian map_template = """ Write a summary in chinese of this chunk of text that includes the main points and any important details. {texts} """ map_prompt = PromptTemplate.from_template(map_template) map_chain = LLMChain(llm=llm, prompt=map_prompt)
# reduce chain reduce_template = """The following is set of summaries in Chinese: {texts} Take these and distill it into a final, consolidated summary in chinese. CHINESE ANSWER:""" reduce_prompt = PromptTemplate.from_template(reduce_template) reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
prompt_template = """Write a concise summary of the following: {text} CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template)
refine_template = ( "Your job is to produce a final comprehensive summary in Chinese, considering all the context provided so far, including: {existing_answer}\\n" "We have the opportunity to further refine and build upon the existing summary" "with some more context below.\\n" "------------\\n" "{text}\\n" "------------\\n" "Given the new context, refine and original summary comprehensively and concisely in Chinese, making sure to cover important details from the entire context." )