OpenAI API JSON formatting guide and json_repair bug fixes

Written by
Caleb Hayes
Updated on:June-28th-2025
Recommendation

Master the OpenAI API JSON format and easily process complex data.

Core content:
1. OpenAI API returns core parameter configuration in JSON format
2. Basic JSON format response and complex structured data request examples
3. JSON error handling and repair techniques

Yang Fangxian
Founder of 53AI/Most Valuable Expert of Tencent Cloud (TVP)


The core parameters areresponse_format={"type": "json_object"}  , other models that support json calls can also be used in this way. Let's take the Openai model as an example

Specify the OpenAI API to return JSON format

Basic JSON format response example

import  openai

client = openai.OpenAI(api_key= "your-api-key" )


response = client.chat.completions.create(
    model = "gpt-4-turbo" ,
    response_format={ "type""json_object" },
    messages=[
        { "role""system""content""You are a helper that returns JSON." },
        { "role""user""content""Returns JSON containing username, age and hobbies" }
    ]
)

print(response.choices[ 0 ].message.content)
# Sample output:
# {
# "name": "John Doe",
# "age": 30,
# "hobbies": ["reading", "hiking", "photography"]
# }

More complex structured data requests

response = client.chat.completions.create(
    model = "gpt-4-turbo" ,
    response_format={ "type""json_object" },
    messages=[
        { "role""system""content""You are a helper that returns JSON." },
        { "role""user""content""Generate data for 5 users, including name, email, and subscription status" }
    ]
)

print(response.choices[ 0 ].message.content)
# Sample output:
# {
# "users": [
# {"id": 1, "name": "Alice Smith", "email": "alice@example.com", "subscribed": true},
# {"id": 2, "name": "Bob Johnson", "email": "bob@example.com", "subscribed": false},
# {"id": 3, "name": "Carol Williams", "email": "carol@example.com", "subscribed": true},
# {"id": 4, "name": "David Brown", "email": "david@example.com", "subscribed": true},
# {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "subscribed": false}
# ]
# }

Ensuring a JSON response using a function call

response = client.chat.completions.create(
    model = "gpt-4-turbo" ,
    messages=[
        { "role""system""content""You are an assistant who helps users." },
        { "role""user""content""Analyze the sentiment of the following text: 'I am very happy today, but the weather is not very good'" }
    ],
    tools=[{
        "type""function" ,
        "function" : {
            "name""analyze_sentiment" ,
            "description""Analyze the sentiment of text" ,
            "parameters" : {
                "type""object" ,
                "properties" : {
                    "text" : { "type""string""description""The text to be analyzed" },
                    "sentiment" : { "type""string""enum" : [ "positive""negative""neutral""mixed" ]},
                    "confidence" : { "type""number""description""Confidence of sentiment analysis" },
                    "details" : {
                        "type""object" ,
                        "properties" : {
                            "positive_aspects" : { "type""array""items" : { "type""string" }},
                            "negative_aspects" : { "type""array""items" : { "type""string" }}
                        }
                    }
                },
                "required" : [ "sentiment""confidence" ]
            }
        }
    }],
    tool_choice={ "type""function""function" : { "name""analyze_sentiment" }}
)

print(response.choices[ 0 ].message.tool_calls[ 0 ].function.arguments)
# Sample output:
# {
# "text": "I am very happy today, but the weather is not very good",
# "sentiment": "mixed",
# "confidence": 0.85,
# "details": {
# "positive_aspects": ["I am very happy today"],
# "negative_aspects": ["The weather is not good"]
# }
# }

Handle JSON response format for specific scenarios

Example 1: JSON format of Chinese content


response = client.chat.completions.create(
    model = "gpt-4-turbo" ,
    response_format={ "type""json_object" },
    messages=[
        { "role""system""content""You are a helper that returns JSON." },
        { "role""user""content""Return a JSON array containing Chinese sentences and their English translations" }
    ]
)

print(response.choices[ 0 ].message.content)
# Sample output:
# {
# "translations": [
# {"chinese": "Hello World", "english": "Hello world"},
# {"chinese": "Nice to meet you", "english": "Nice to meet you"},
# {"chinese": "I love learning programming", "english": "I love learning programming"}
# ]
# }

Example 2: Nested JSON structure

response = client.chat.completions.create(
    model = "gpt-4-turbo" ,
    response_format={ "type""json_object" },
    messages=[
        { "role""system""content""You are a helper that returns JSON." },
        { "role""user""content""Return a JSON of a company structure, including departments and employees" }
    ]
)

print(response.choices[ 0 ].message.content)
# Sample output:
# {
# "company": {
# "name": "Tech Solutions Inc.",
# "founded": 2010,
# "departments": [
# {
# "name": "Engineering",
# "head": "Zhang Wei",
# "employees": [
# {"id": 101, "name": "Li Ming", "position": "Senior Developer"},
# {"id": 102, "name": "Wang Fang", "position": "QA Engineer"}
# ]
# },
# {
# "name": "Marketing",
# "head": "Sarah Johnson",
# "employees": [
# {"id": 201, "name": "Liu Qing", "position": "Marketing Specialist"},
# {"id": 202, "name": "Chen Xiao", "position": "Content Writer"}
# ]
# }
# ]
# }
# }

Example 3: Enforcing a model to follow a specific JSON schema


def get_structured_data (query, schema) : 
    system_prompt =  f"""
    You must return data in strict accordance with the following JSON schema:
    ```
    {json.dumps(schema, ensure_ascii= False , indent= 2 )}
    ```
    Do not add any extra fields, and do not omit any required fields.
    Do not include any additional text, explanations, or comments outside of the returned JSON.
    """

    
    response = client.chat.completions.create(
        model = "gpt-4-turbo" ,
        response_format={ "type""json_object" },
        messages=[
            { "role""system""content" : system_prompt},
            { "role""user""content" : query}
        ]
    )
    
    return  response.choices[ 0 ].message.content

# Define a specific data mode
product_schema = {
    "type""object" ,
    "properties" : {
        "products" : {
            "type""array" ,
            "items" : {
                "type""object" ,
                "properties" : {
                    "id" : { "type""string" },
                    "name" : { "type""string" },
                    "price" : { "type""number" },
                    "category" : { "type""string" },
                    "inStock" : { "type""boolean" }
                },
                "required" : [ "id""name""price""category""inStock" ]
            }
        }
    },
    "required" : [ "products" ]
}

result = get_structured_data( "Generate detailed information of 3 electronic products" , product_schema)
print(result)
# Sample output:
# {
# "products": [
# {
# "id": "EP001",
# "name": "Ultra-thin laptop",
# "price": 5999.99,
# "category": "Computer",
# "inStock": true
# },
# {
# "id": "EP002",
# "name": "Smartphone",
# "price": 3999.99,
# "category": "Mobile Phone",
# "inStock": true
# },
# {
# "id": "EP003",
# "name": "Wireless Headphones",
# "price": 999.99,
# "category": "Audio Equipment",
# "inStock": false
# }
# ]
# }

usejson_repairFix JSON error example

When the JSON format returned by the OpenAI API is problematic, you can use the json_repair library to fix these errors. You can see that most simple error examples can be fixed directly, but some semantically difficult ones are indeed more difficult to fix. The following are common JSON errors and their repair examples:

from  json_repair  import  repair_json, loads
import  json
  • Example 1: Fix the problem of single quotes replacing double quotes
bad_json1 =  "{'name': 'John', 'age': 30, 'city': 'New York'}"
fixed_json1 = repair_json(bad_json1)
print( "Fix single quotes:" )
print( f"Before repair:  {bad_json1} " )
print( f"After fix:  {fixed_json1} " )
print()
  • Example 2: Fix keys missing quotes
bad_json2 =  "{name: 'John', age: 30, city: 'New York'}"
fixed_json2 = repair_json(bad_json2)
print( "Fix missing quotes on key:" )
print( f"Before repair:  {bad_json2} " )
print( f"After fix:  {fixed_json2} " )
print()
  • Example 3: Fixing the comma problem
bad_json3 =  '{"name": "John", "age": 30, "city": "New York",}' # extra comma at the end  
fixed_json3 = repair_json(bad_json3)
print( "Fix extra comma:" )
print( f"Before repair:  {bad_json3} " )
print( f"After fix:  {fixed_json3} " )
print()
  • Example 4: Fix the missing curly brace problem
bad_json4 = '"name": "John", "age": 30, "city": "New York"' fixed_json4 = repair_json(bad_json4) print("Fixed missing brackets:") print(f"Before repair: {bad_json4}") print(f"After repair: {fixed_json4}") print()

This fails directly, without restoring the braces

  • Example 5: Fixing non-standard boolean or null values
bad_json5 =  '{"name": "John", "active": True, "data": None}'
fixed_json5 = repair_json(bad_json5)
print( "Fix non-standard boolean or null values:" )
print( f"Before repair:  {bad_json5} " )
print( f"After fix:  {fixed_json5} " )
print()
  • Example 6: Fixing errors in nested structures
bad_json6 =  '{"user": {"name": "John", "contacts": {"email": "john@example.com", phone: "123-456-7890"}}}'
fixed_json6 = repair_json(bad_json6)
print( "Fix errors in nested structures:" )
print( f"Before repair:  {bad_json6} " )
print( f"After fix:  {fixed_json6} " )
print()
  • Example 7: Fixing errors in arrays
bad_json7 = '{"items": [1, 2, 3,, 4, 5]}' # There is an extra comma in the array fixed_json7 = repair_json(bad_json7) print("Fixed errors in array:") print(f"Before repair: {bad_json7}") print(f"After repair: {fixed_json7}") print()
  • Example 8: Fixing unmatched brackets
bad_json8 =  '{"name": "John", "items": [1, 2, 3}' # The square brackets are not closed  
fixed_json8 = repair_json(bad_json8)
print( "Fix unmatched brackets:" )
print( f"Before repair:  {bad_json8} " )
print( f"After fix:  {fixed_json8} " )
print()

- Example 9 : Fix the problem of non-ASCII characters such as Chinese
```Python
bad_json9 =  "{'name': '张三', 'city': '北京'}"
fixed_json9 = repair_json(bad_json9, ensure_ascii= False )
print( "Fix JSON containing Chinese and keep Chinese characters:" )
print( f"Before repair:  {bad_json9} " )
print( f"After fix:  {fixed_json9} " )
print()
  • Example 10: Get Python object directly instead of JSON string
bad_json10 =  "{'name': 'John', 'age': 30, 'skills': ['Python', 'JavaScript']}"
fixed_obj10 = loads(bad_json10)   # equivalent to repair_json(bad_json10, return_objects=True)
print( "Get Python object directly:" )
print( f"Before repair:  {bad_json10} " )
print( f"After repair (Python object):  {fixed_obj10} " )
print( f"Object type:  {type(fixed_obj10)} " )
print()
  • Example 11: Handling badly broken JSON
severely_broken_json =  "{This is not valid JSON, name: 'John', age: missing_value}"
try :
    fixed_severely_broken = repair_json(severely_broken_json)
    print( "Fix badly broken JSON:" )
    print( f"Before repair:  {severely_broken_json} " )
    print( f"After repair:  {fixed_severely_broken} " )
except  Exception  as  e:
    print( f"Repair failed:  {e} " )
print()

This repair actually failed, mainly because there was a sentence in the previous field that had a relatively large impact, making the repair quite difficult.

  • Example 12: Processing JSON containing comments (the JSON standard does not support comments)
json_with_comments =  """
{
  "name": "John", // This is the username
  "age": 30, /* This is the age */
  "city": "New York"
}
"""

fixed_json_comments = repair_json(json_with_comments)
print( "Fix JSON containing comments:" )
print( f"Before repair:  {json_with_comments} " )
print( f"After fix:  {fixed_json_comments} " )

There is another scenario where we often encounter files that start with ```json

For example, the following:


markdown_json =  "" "```json
{
  "
name ": " Zhang San ",
  "
age ": 30,
  "
skills ": ['Python', 'JavaScript', 'React'],
  "
contact ": {
    email: "
zhangsan@example.com ",
    phone: "
123-456-7890 "
  }
}
```"
""

or

broken_json =  "" "{
  "
products ": [
    {"
id ": 1, " name ": " Laptop ", " price ": 5999.99},
    {"
id ": 2, " name ": " Smartphone ", " price ": 3999.99,},
    {"
id ": 3, name: " Wireless Headphones ", " price ": 999.99}
  ],
  "
total_items ": 3,
  "
in_stock ": True
}"
""

We can use the following function to remove the prefix and suffix, and then repair it

def repair_json_output(content: str) -> str:
    "" "
    Repair and normalize JSON output.

    Args:
        content (str): String content that may contain JSON

    Returns:
        str: Repaired JSON string, or original content if not JSON
    "
""
    content = content.strip()
    if  content.startswith(( "{""[" )) or  "```json" in  content or  "```ts" in  content:
        try:
            # If content is wrapped in ```json code block, extract the JSON part
            if  content.startswith( "```json" ):
                content = content.removeprefix( "```json" )

            if  content.startswith( "```ts" ):
                content = content.removeprefix( "```ts" )

            if  content.endswith( "```" ):
                content = content.removesuffix( "```" )

            # Try to repair and parse JSON
            repaired_content = json_repair.loads(content)
            return  json.dumps(repaired_content, ensure_ascii=False)
        except Exception as e:
            logger.warning(f "JSON repair failed: {e}" )
    return  content