01。
概述
一款利用检索增强生成(RAG)技术和LLaMA-3.1-8B即时大型语言模型(LLM)的个人助理工具。该工具旨在通过结合机器学习和基于检索的系统,彻底改变PDF文档分析任务。
02。
RAG架构的起源
03。
RAG 架构概述
04。
实现细节
05。
安装
!condainstall-npa\
pytorch\
torchvision\
torchaudio\
cpuonly\
-cpytorch\
-cconda-forge\
--yes
%pipinstall-Uipywidgets
%pipinstall-Urequests
%pipinstall-Ullama-index
%pipinstall-Ullama-index-embeddings-huggingface
%pipinstall-Ullama-index-llms-groq
%pipinstall-Ugroq
%pipinstall-Ugradio
importos
importplatform
importsubprocess
importrequests
definstall_tesseract():
"""
InstallsTesseractOCRbasedontheoperatingsystem.
"""
os_name=platform.system()
ifos_name=="Linux":
print("DetectedLinux.InstallingTesseractusingapt-get...")
subprocess.run(["sudo","apt-get","update"],check=True)
subprocess.run(["sudo","apt-get","install","-y","tesseract-ocr"],check=True)
elifos_name=="Darwin":
print("DetectedmacOS.InstallingTesseractusingHomebrew...")
subprocess.run(["brew","install","tesseract"],check=True)
elifos_name=="Windows":
tesseract_installer_url="https://github.com/UB-Mannheim/tesseract/releases/download/v5.4.0.20240606/tesseract-ocr-w64-setup-5.4.0.20240606.exe"
installer_path="tesseract-ocr-w64-setup-5.4.0.20240606.exe"
response=requests.get(tesseract_installer_url)
withopen(installer_path,"wb")asfile:
file.write(response.content)
tesseract_path=r"C:\ProgramFiles\Tesseract-OCR"
os.environ["
ATH"]+=os.pathsep+tesseract_path
try:
result=subprocess.run(["tesseract","--version"],check=True,capture_output=True,text=True)
print(result.stdout)
exceptsubprocess.CalledProcessErrorase:
print(f"ErrorrunningTesseract:{e}")
else:
print(f"UnsupportedOS:{os_name}")
install_tesseract()
Convert PDF to OCR
importwebbrowser
url="https://www.ilovepdf.com/ocr-pdf"
webbrowser.open_new(url)
importos
fromllama_index.coreimport(
Settings,
VectorStoreIndex,
SimpleDirectoryReader,
StorageContext,
load_index_from_storage
)
fromllama_index.embeddings.huggingfaceimportHuggingFaceEmbedding
fromllama_index.core.node_parserimportSentenceSplitter
fromllama_index.llms.groqimportGroq
importgradioasgr
| 欢迎光临 链载Ai (https://www.lianzai.com/) | Powered by Discuz! X3.5 |