链载Ai
标题: DeepEval使用自定义模型评估RAG实例 [打印本页]
作者: 链载Ai 时间: 昨天 21:34
标题: DeepEval使用自定义模型评估RAG实例
#导入依赖包importtimeimportrequestsimportjsonfromservices.ChatServiceimportChatServicefromdeepeval.modelsimportDeepEvalBaseLLMfromdeepeval.test_caseimportLLMTestCasefromdeepeval.metricsimport(FaithfulnessMetric,ContextualPrecisionMetric,ContextualRecallMetric,ContextualRelevancyMetric)
#自定义模型classQwenModel(DeepEvalBaseLLM):def__init__(self):self.api_key="fastgpt-*******"self.base_url="https://jz-fastgpt-stable.djtest.cn/api/v1"self.model_name="qwen-max"defload_model(self):returnselfdefgenerate(self,prompt:str)->str:#调用QwenAPI的逻辑headers={"Authorization":f"Bearer{self.api_key}","Content-Type":"application/json"}payload={"model":self.model_name,"messages":[{"role":"user","content":prompt}],"temperature":0}response=requests.post(f"{self.base_url}/chat/completions",headers=headers,data=json.dumps(payload))ifresponse.status_code==200:returnresponse.json()["choices"][0]["message"]["content"]else:raiseRuntimeError(f"API调用失败:{response.status_code},{response.text}")asyncdefa_generate(self,prompt:str)->str:#异步实现(与同步类似)returnself.generate(prompt)defget_model_name(self):returnself.model_name#评估代码封装classEvalService:defget_faithfulness(self,ques:str,response):#创建评估模型实例qwen_model=QwenModel()faithfulness_metric=FaithfulnessMetric(model=qwen_model)test_case=self.get_test_case(ques,response)faithfulness_metric.measure(test_case)faithfulness=dict()faithfulness["score"]=faithfulness_metric.scorefaithfulness["reason"]=faithfulness_metric.reasonprint(f"faithfulness:{faithfulness}")returnfaithfulnessdefget_contextprecision(self,ques:str,response):#创建评估模型实例qwen_model=QwenModel()contextprecision_metric=ContextualPrecisionMetric(model=qwen_model)test_case=self.get_test_case(ques,response)contextprecision_metric.measure(test_case)contextprecision=dict()contextprecision["score"]=contextprecision_metric.scorecontextprecision["reason"]=contextprecision_metric.reasonprint(f"contextprecision:{contextprecision}")returncontextprecisiondefget_contextrecall(self,ques:str,response):#创建评估模型实例qwen_model=QwenModel()contextrecall_metric=ContextualRecallMetric(model=qwen_model)test_case=self.get_test_case(ques,response)contextrecall_metric.measure(test_case)contextrecall=dict()contextrecall["score"]=contextrecall_metric.scorecontextrecall["reason"]=contextrecall_metric.reasonprint(f"contextrecall:{contextrecall}")returncontextrecalldefget_contextrelevant(self,ques:str,response):#创建评估模型实例qwen_model=QwenModel()contextrelevant_metric=ContextualRelevancyMetric(model=qwen_model)test_case=self.get_test_case(ques,response)contextrelevant_metric.measure(test_case)contextrelevant=dict()contextrelevant["score"]=contextrelevant_metric.scorecontextrelevant["reason"]=contextrelevant_metric.reasonprint(f"contextrelevant:{contextrelevant}")returncontextrelevantdefget_test_case(self,ques:str,result):quote_list=result["responseData"][1]["quoteList"]retrival_context=[]forquoteinquote_list:retrival_context.append(f"{quote['q']}:{quote['a']}")context=[]historypreview=result["responseData"][2]["historyPreview"]forhistoryinhistorypreview:context.append(history['value'])answer=result["choices"][0]["message"]["content"]#使用自定义模型进行评估res_case=LLMTestCase(input=ques,actual_output=answer,expected_output=answer,context=context,retrieval_context=retrival_context)returnres_caseif __name__=="__main__": url='https://XXXXXX/api/v1/chat/completions' key='fastgpt-XXXXXX'
cr=ChatService(url,key)
#调用ai应用,得到result result=cr.question_response("XXX怎么收费?") es=EvalService()
es.get_faithfulness("XXX怎么收费?",result) es.get_contextprecision("XXX怎么收费?",result) es.get_contextrecall("XXX怎么收费?",result) es.get_contextrelevant("XXX怎么收费?",result)
faithfulness:{'score':1.0,'reason':'实际输出与检索上下文完全一致,没有任何矛盾之处,所以得到了满分1.00的忠实度评分。'}contextprecision:{'score':1.0,'reason':'得分为1.00,因为相关的节点(即第一个节点)被正确地排在了最前面。'}contextrecall:{'score':0.5,'reason':'分数为0.50,因为虽然节点在检索上下文中提到了'}contextrelevant:{'score':0.16666666666666666,'reason':"分数为0.17,因为大部分检索内容并未涉及XXX问题,例如……"}
| 欢迎光临 链载Ai (https://www.lianzai.com/) |
Powered by Discuz! X3.5 |