StructuredOCR类。pipinstallmistralai
wget https://raw.githubusercontent.com/mistralai/cookbook/main/mistral/ocr/mistral7b.pdf
wget https://raw.githubusercontent.com/mistralai/cookbook/main/mistral/ocr/receipt.pngfrommistralaiimportMistral
api_key ="API_KEY"# 替换为你的API密钥
client = Mistral(api_key=api_key)frompathlibimportPath
frommistralaiimportDocumentURLChunk, ImageURLChunk, TextChunk
importjson
# 验证PDF文件是否存在
pdf_file = Path("mistral7b.pdf")
assertpdf_file.is_file()
# 上传PDF文件到Mistral OCR服务
uploaded_file = client.files.upload(
file={
"file_name": pdf_file.stem,
"content": pdf_file.read_bytes(),
},
purpose="ocr",
)
# 获取上传文件的URL
signed_url = client.files.get_signed_url(file_id=uploaded_file.id, expiry=1)
# 使用OCR处理PDF文件
pdf_response = client.ocr.process(
document=DocumentURLChunk(document_url=signed_url.url),
model="mistral-ocr-latest",
include_image_base64=True
)
# 将响应转换为JSON格式
response_dict = json.loads(pdf_response.model_dump_json())
print(json.dumps(response_dict, indent=4)[0:1000]) # 打印前1000个字符importbase64
# 验证图像文件是否存在
image_file = Path("receipt.png")
assertimage_file.is_file()
# 将图像编码为base64
encoded = base64.b64encode(image_file.read_bytes()).decode()
base64_data_url =f"data:image/jpeg;base64,{encoded}"
# 使用OCR处理图像
image_response = client.ocr.process(
document=ImageURLChunk(image_url=base64_data_url),
model="mistral-ocr-latest"
)
# 将响应转换为JSON格式
response_dict = json.loads(image_response.model_dump_json())
print(json.dumps(response_dict, indent=4))# 获取OCR结果
image_ocr_markdown = image_response.pages[0].markdown
# 使用Pixtral-12B模型生成结构化JSON响应
chat_response = client.chat.complete(
model="pixtral-12b-latest",
messages=[
{
"role":"user",
"content": [
ImageURLChunk(image_url=base64_data_url),
TextChunk(
text=(
f"This is image's OCR in markdown:\n\n{image_ocr_markdown}\n.\n"
"Convert this into a sensible structured json response. "
"The output should be strictly be json with no extra commentary"
)
),
],
}
],
response_format={"type":"json_object"},
temperature=0,
)
# 解析并返回JSON响应
response_dict = json.loads(chat_response.choices[0].message.content)
print(json.dumps(response_dict, indent=4))fromenumimportEnum
frompathlibimportPath
frompydanticimportBaseModel
importbase64
classStructuredOCR(BaseModel):
file_name:str
topics:list[str]
languages:str
ocr_contents:dict
defstructured_ocr(image_path:str) -> StructuredOCR:
"""
处理图像并提取结构化数据。
参数:
image_path: 图像文件路径
返回:
StructuredOCR对象,包含提取的数据
异常:
AssertionError: 如果图像文件不存在
"""
# 验证输入文件
image_file = Path(image_path)
assertimage_file.is_file(),"提供的图像路径不存在。"
# 读取并编码图像文件
encoded_image = base64.b64encode(image_file.read_bytes()).decode()
base64_data_url =f"data:image/jpeg;base64,{encoded_image}"
# 使用OCR处理图像
image_response = client.ocr.process(
document=ImageURLChunk(image_url=base64_data_url),
model="mistral-ocr-latest"
)
image_ocr_markdown = image_response.pages[0].markdown
# 解析OCR结果为结构化JSON响应
chat_response = client.chat.parse(
model="pixtral-12b-latest",
messages=[
{
"role":"user",
"content": [
ImageURLChunk(image_url=base64_data_url),
TextChunk(text=(
f"This is the image's OCR in markdown:\n{image_ocr_markdown}\n.\n"
"Convert this into a structured JSON response "
"with the OCR contents in a sensible dictionnary."
)
)
]
}
],
response_format=StructuredOCR,
temperature=0
)
returnchat_response.choices[0].message.parsed
# 示例用法
image_path ="receipt.png"# 示例收据图像路径
structured_response = structured_ocr(image_path) # 处理图像并提取数据
# 解析并返回JSON响应
response_dict = json.loads(structured_response.model_dump_json())
print(json.dumps(response_dict, indent=4))Mistral OCR不仅是一款强大的文本提取工具,更是结合LLM实现结构化数据提取的利器。无论你是开发者、数据分析师,还是企业用户,Mistral OCR都能为你提供高效的数据提取解决方案。赶快点击链接,体验这款工具的强大功能吧!
| 欢迎光临 链载Ai (https://www.lianzai.com/) | Powered by Discuz! X3.5 |