OpenAI API JSON格式指南与json_repair错误修复

显示全部楼层

核心参数是response_format={"type": "json_object"} ,其他支持json调用的模型也可以这样使用的，下面我们以Openai模型为例

指定OpenAI API返回JSON格式

基本JSON格式响应示例

importopenai

client = openai.OpenAI(api_key="your-api-key")


response = client.chat.completions.create(
  model="gpt-4-turbo",
  response_format={"type":"json_object"},
  messages=[
    {"role":"system","content":"你是一个返回JSON格式的助手。"},
    {"role":"user","content":"返回包含用户名、年龄和爱好的JSON"}
  ]
)

print(response.choices[0].message.content)
# 输出示例:
# {
#  "name": "John Doe",
#  "age": 30,
#  "hobbies": ["reading", "hiking", "photography"]
# }

更复杂的结构化数据请求

response = client.chat.completions.create(
  model="gpt-4-turbo",
  response_format={"type":"json_object"},
  messages=[
    {"role":"system","content":"你是一个返回JSON格式的助手。"},
    {"role":"user","content":"生成5个用户的数据，包括姓名、电子邮件和订阅状态"}
  ]
)

print(response.choices[0].message.content)
# 输出示例:
# {
#  "users": [
#   {"id": 1, "name": "Alice Smith", "email": "alice@example.com", "subscribed": true},
#   {"id": 2, "name": "Bob Johnson", "email": "bob@example.com", "subscribed": false},
#   {"id": 3, "name": "Carol Williams", "email": "carol@example.com", "subscribed": true},
#   {"id": 4, "name": "David Brown", "email": "david@example.com", "subscribed": true},
#   {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "subscribed": false}
#  ]
# }

使用函数调用确保JSON响应

response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages=[
    {"role":"system","content":"你是一个帮助用户的助手。"},
    {"role":"user","content":"分析以下文本的情感：'我今天非常开心，但天气不太好'"}
  ],
  tools=[{
   "type":"function",
   "function": {
     "name":"analyze_sentiment",
     "description":"分析文本的情感",
     "parameters": {
       "type":"object",
       "properties": {
         "text": {"type":"string","description":"要分析的文本"},
         "sentiment": {"type":"string","enum": ["positive","negative","neutral","mixed"]},
         "confidence": {"type":"number","description":"情感分析的置信度"},
         "details": {
           "type":"object",
           "properties": {
             "positive_aspects": {"type":"array","items": {"type":"string"}},
             "negative_aspects": {"type":"array","items": {"type":"string"}}
            }
          }
        },
       "required": ["sentiment","confidence"]
      }
    }
  }],
  tool_choice={"type":"function","function": {"name":"analyze_sentiment"}}
)

print(response.choices[0].message.tool_calls[0].function.arguments)
# 输出示例:
# {
#  "text": "我今天非常开心，但天气不太好",
#  "sentiment": "mixed",
#  "confidence": 0.85,
#  "details": {
#   "positive_aspects": ["今天非常开心"],
#   "negative_aspects": ["天气不太好"]
#  }
# }

处理特定场景的JSON返回格式

示例1: 中文内容的JSON格式


response = client.chat.completions.create(
  model="gpt-4-turbo",
  response_format={"type":"json_object"},
  messages=[
    {"role":"system","content":"你是一个返回JSON格式的助手。"},
    {"role":"user","content":"返回一个包含中文句子及其英文翻译的JSON数组"}
  ]
)

print(response.choices[0].message.content)
# 输出示例:
# {
#  "translations": [
#   {"chinese": "你好世界", "english": "Hello world"},
#   {"chinese": "很高兴认识你", "english": "Nice to meet you"},
#   {"chinese": "我爱学习编程", "english": "I love learning programming"}
#  ]
# }

示例2: 嵌套JSON结构

response = client.chat.completions.create(
  model="gpt-4-turbo",
  response_format={"type":"json_object"},
  messages=[
    {"role":"system","content":"你是一个返回JSON格式的助手。"},
    {"role":"user","content":"返回一个公司结构的JSON，包含部门和员工"}
  ]
)

print(response.choices[0].message.content)
# 输出示例:
# {
#  "company": {
#   "name": "Tech Solutions Inc.",
#   "founded": 2010,
#   "departments": [
#    {
#     "name": "Engineering",
#     "head": "Zhang Wei",
#     "employees": [
#      {"id": 101, "name": "李明", "position": "Senior Developer"},
#      {"id": 102, "name": "王芳", "position": "QA Engineer"}
#     ]
#    },
#    {
#     "name": "Marketing",
#     "head": "Sarah Johnson",
#     "employees": [
#      {"id": 201, "name": "刘青", "position": "Marketing Specialist"},
#      {"id": 202, "name": "陈晓", "position": "Content Writer"}
#     ]
#    }
#   ]
#  }
# }

示例3: 强制模型遵循特定JSON模式


defget_structured_data(query, schema):
  system_prompt =f"""
  你必须严格按照以下JSON模式返回数据:
  ```
 {json.dumps(schema, ensure_ascii=False, indent=2)}
  ```
  不要添加任何额外的字段，也不要省略任何必需的字段。
  不要在返回的JSON外包含任何其他文本、解释或注释。
  """
 
  response = client.chat.completions.create(
    model="gpt-4-turbo",
    response_format={"type":"json_object"},
    messages=[
      {"role":"system","content": system_prompt},
      {"role":"user","content": query}
    ]
  )
 
 returnresponse.choices[0].message.content

# 定义一个特定的数据模式
product_schema = {
 "type":"object",
 "properties": {
   "products": {
     "type":"array",
     "items": {
       "type":"object",
       "properties": {
         "id": {"type":"string"},
         "name": {"type":"string"},
         "price": {"type":"number"},
         "category": {"type":"string"},
         "inStock": {"type":"boolean"}
        },
       "required": ["id","name","price","category","inStock"]
      }
    }
  },
 "required": ["products"]
}

result = get_structured_data("生成3个电子产品的详细信息", product_schema)
print(result)
# 输出示例:
# {
#  "products": [
#   {
#    "id": "EP001",
#    "name": "超薄笔记本电脑",
#    "price": 5999.99,
#    "category": "电脑",
#    "inStock": true
#   },
#   {
#    "id": "EP002",
#    "name": "智能手机",
#    "price": 3999.99,
#    "category": "手机",
#    "inStock": true
#   },
#   {
#    "id": "EP003",
#    "name": "无线耳机",
#    "price": 999.99,
#    "category": "音频设备",
#    "inStock": false
#   }
#  ]
# }

使用`json_repair`修复JSON错误示例

当OpenAI API返回的JSON格式有问题时，可以使用json_repair库修复这些错误。可以看到大部分简单的错误示例是可以直接修复的，有些语义难度大的确实比较难修复。以下是常见的JSON错误及其修复示例：

fromjson_repairimportrepair_json, loads
importjson

示例1: 修复单引号替代双引号的问题

bad_json1 ="{'name': 'John', 'age': 30, 'city': 'New York'}"
fixed_json1 = repair_json(bad_json1)
print("修复单引号:")
print(f"修复前:{bad_json1}")
print(f"修复后:{fixed_json1}")
print()

示例2: 修复缺少引号的键

bad_json2 ="{name: 'John', age: 30, city: 'New York'}"
fixed_json2 = repair_json(bad_json2)
print("修复缺少引号的键:")
print(f"修复前:{bad_json2}")
print(f"修复后:{fixed_json2}")
print()

示例3: 修复逗号问题

bad_json3 ='{"name": "John", "age": 30, "city": "New York",}'# 结尾多余的逗号
fixed_json3 = repair_json(bad_json3)
print("修复多余的逗号:")
print(f"修复前:{bad_json3}")
print(f"修复后:{fixed_json3}")
print()

示例4: 修复缺少大括号的问题

bad_json4='"name":"John","age":30,"city":"NewYork"'fixed_json4=repair_json(bad_json4)print("修复缺少括号:")print(f"修复前:{bad_json4}")print(f"修复后:{fixed_json4}")print()

这个直接失败了，没有还原大括号

示例5: 修复非标准的布尔值或空值

bad_json5 ='{"name": "John", "active": True, "data": None}'
fixed_json5 = repair_json(bad_json5)
print("修复非标准的布尔值或空值:")
print(f"修复前:{bad_json5}")
print(f"修复后:{fixed_json5}")
print()

示例6: 修复嵌套结构中的错误

bad_json6 ='{"user": {"name": "John", "contacts": {"email": "john@example.com", phone: "123-456-7890"}}}'
fixed_json6 = repair_json(bad_json6)
print("修复嵌套结构中的错误:")
print(f"修复前:{bad_json6}")
print(f"修复后:{fixed_json6}")
print()

示例7: 修复数组中的错误

bad_json7='{"items":[1,2,3,,4,5]}'#数组中有多余的逗号fixed_json7=repair_json(bad_json7)print("修复数组中的错误:")print(f"修复前:{bad_json7}")print(f"修复后:{fixed_json7}")print()

示例8: 修复不匹配的括号

bad_json8 ='{"name": "John", "items": [1, 2, 3}'# 方括号没有闭合
fixed_json8 = repair_json(bad_json8)
print("修复不匹配的括号:")
print(f"修复前:{bad_json8}")
print(f"修复后:{fixed_json8}")
print()

- 示例9: 修复中文等非ASCII字符的问题
```python
bad_json9 ="{'name': '张三', 'city': '北京'}"
fixed_json9 = repair_json(bad_json9, ensure_ascii=False)
print("修复包含中文的JSON并保留中文字符:")
print(f"修复前:{bad_json9}")
print(f"修复后:{fixed_json9}")
print()

示例10: 直接获取Python对象而不是JSON字符串

bad_json10 ="{'name': 'John', 'age': 30, 'skills': ['ython', 'JavaScript']}"
fixed_obj10 = loads(bad_json10) # 等同于 repair_json(bad_json10, return_objects=True)
print("直接获取Python对象:")
print(f"修复前:{bad_json10}")
print(f"修复后(Python对象):{fixed_obj10}")
print(f"对象类型:{type(fixed_obj10)}")
print()

示例11: 处理严重破损的JSON

severely_broken_json ="{这不是有效的JSON，name: 'John', age: missing_value}"
try:
  fixed_severely_broken = repair_json(severely_broken_json)
  print("修复严重破损的JSON:")
  print(f"修复前:{severely_broken_json}")
  print(f"修复后:{fixed_severely_broken}")
exceptExceptionase:
  print(f"修复失败:{e}")
print()

这个其实修复失败了，主要是因为前一个字段确实有句话影响比较大，修复难度比较大。

示例12: 处理包含注释的JSON (JSON标准不支持注释)

json_with_comments ="""
{
 "name": "John", // 这是用户名
 "age": 30, /* 这是年龄 */
 "city": "New York"
}
"""
fixed_json_comments = repair_json(json_with_comments)
print("修复包含注释的JSON:")
print(f"修复前:{json_with_comments}")
print(f"修复后:{fixed_json_comments}")

还有一个场景，就是我们会经常遇到开头为```json

比如下面：


markdown_json ="""```json
{
 "name": "张三",
 "age": 30,
 "skills": ['ython', 'JavaScript', 'React'],
 "contact": {
  email: "zhangsan@example.com",
  phone: "123-456-7890"
 }
}
```"""

或者

broken_json ="""{
 "products": [
  {"id": 1, "name": "笔记本电脑", "price": 5999.99},
  {"id": 2, "name": "智能手机", "price": 3999.99,},
  {"id": 3, name: "无线耳机", "price": 999.99}
 ],
 "total_items": 3,
 "in_stock": True
}"""

我们可以用下面一个函数来去除前缀和后缀，然后再去修复

def repair_json_output(content: str) -> str:
 """
  Repair and normalize JSON output.

  Args:
    content (str): String content that may contain JSON

  Returns:
    str: Repaired JSON string, or original content if not JSON
  """
  content = content.strip()
 ifcontent.startswith(("{","[")) or"```json"incontent or"```ts"incontent:
    try:
     # If content is wrapped in ```json code block, extract the JSON part
     ifcontent.startswith("```json"):
        content = content.removeprefix("```json")

     ifcontent.startswith("```ts"):
        content = content.removeprefix("```ts")

     ifcontent.endswith("```"):
        content = content.removesuffix("```")

     # Try to repair and parse JSON
      repaired_content = json_repair.loads(content)
     returnjson.dumps(repaired_content, ensure_ascii=False)
    except Exception as e:
      logger.warning(f"JSON repair failed: {e}")
 returncontent