Python JSON Handling

Python JSON Handling

JSON (JavaScript Object Notation) is a lightweight data format for data exchange. Python’s json module provides tools for encoding and decoding JSON data, essential for APIs and data storage.

JSON Basics

JSON represents data as key-value pairs and arrays.

JSON Data Types

# JSON supports these types:
# - Objects: {"key": "value"}
# - Arrays: [1, 2, 3]
# - Strings: "text"
# - Numbers: 42, 3.14
# - Booleans: true, false
# - null: null

# Python equivalents:
# dict, list, str, int/float, bool, None

import json

# Example JSON string
json_data = '''
{
  "name": "Alice",
  "age": 30,
  "city": "New York",
  "hobbies": ["reading", "coding"],
  "is_student": false,
  "grades": null
}
'''

print("JSON supports objects, arrays, strings, numbers, booleans, and null")

JSON is language-independent but similar to Python data structures. See JSON specification.

Encoding (Serialization)

Convert Python objects to JSON strings.

json.dumps()

import json

# Python data
person = {
    "name": "Bob",
    "age": 25,
    "city": "Chicago",
    "hobbies": ["gaming", "music"],
    "active": True,
    "spouse": None
}

# Convert to JSON string
json_string = json.dumps(person)
print(json_string)
print(f"Type: {type(json_string)}")

# Pretty print
pretty_json = json.dumps(person, indent=2)
print("\nPretty printed:")
print(pretty_json)

json.dumps() converts Python objects to JSON strings. Use indent for readability.

Custom Encoding

import json
from datetime import datetime

# Custom object
class Person:
    def __init__(self, name, birth_date):
        self.name = name
        self.birth_date = birth_date

# Custom encoder
class PersonEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Person):
            return {
                "name": obj.name,
                "birth_year": obj.birth_date.year,
                "type": "Person"
            }
        return super().default(obj)

person = Person("Alice", datetime(1990, 5, 15))

# Encode with custom encoder
json_str = json.dumps(person, cls=PersonEncoder, indent=2)
print(json_str)

Extend JSONEncoder for custom object serialization.

Decoding (Deserialization)

Convert JSON strings to Python objects.

json.loads()

import json

# JSON string
json_str = '{"name": "Charlie", "age": 35, "city": "Boston"}'

# Convert to Python dict
person = json.loads(json_str)
print(f"Name: {person['name']}")
print(f"Type: {type(person)}")

# JSON array
json_array = '[1, 2, 3, "four", true, null]'
python_list = json.loads(json_array)
print(f"List: {python_list}")
print(f"Types: {[type(item) for item in python_list]}")

json.loads() converts JSON strings to Python objects. JSON null becomes Python None.

Handling Parse Errors

import json

# Invalid JSON strings
invalid_json = '{"name": "Alice", "age": }'  # Missing value
incomplete_json = '{"name": "Bob"'  # Missing closing brace

try:
    result = json.loads(invalid_json)
except json.JSONDecodeError as e:
    print(f"JSON decode error: {e}")

try:
    result = json.loads(incomplete_json)
except json.JSONDecodeError as e:
    print(f"JSON decode error: {e}")
    print(f"Position: {e.pos}")

JSONDecodeError provides detailed error information for debugging.

File Operations

Read and write JSON files directly.

json.dump() and json.load()

import json

# Sample data
users = [
    {"id": 1, "name": "Alice", "email": "[email protected]"},
    {"id": 2, "name": "Bob", "email": "[email protected]"},
    {"id": 3, "name": "Charlie", "email": "[email protected]"}
]

# Write to file
with open("users.json", "w") as file:
    json.dump(users, file, indent=2)

print("Data written to users.json")

# Read from file
with open("users.json", "r") as file:
    loaded_users = json.load(file)

print(f"Loaded {len(loaded_users)} users")
print(f"First user: {loaded_users[0]}")

json.dump() and json.load() handle file I/O automatically.

Advanced Features

Handle complex data types and formatting.

Sorting Keys

import json

data = {"zebra": 1, "apple": 2, "banana": 3}

# Default (insertion order preserved in Python 3.7+)
normal = json.dumps(data)
print(f"Normal: {normal}")

# Sorted keys
sorted_json = json.dumps(data, sort_keys=True)
print(f"Sorted: {sorted_json}")

sort_keys=True sorts object keys alphabetically for consistent output.

Custom Separators

import json

data = {"name": "Alice", "age": 30}

# Custom separators (compact)
compact = json.dumps(data, separators=(',', ':'))
print(f"Compact: {compact}")

# Default separators
default = json.dumps(data)
print(f"Default: {default}")

separators parameter controls spacing between items and keys.

Handling Special Types

import json
from datetime import datetime

# Data with datetime
data = {
    "event": "Conference",
    "date": datetime(2023, 12, 25, 10, 30)
}

# Custom encoder for datetime
def datetime_encoder(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

json_str = json.dumps(data, default=datetime_encoder, indent=2)
print(json_str)

# Decode back
decoded = json.loads(json_str)
print(f"Decoded date: {decoded['date']}")

Use default parameter for types not natively supported by JSON.

Practical Examples

Configuration Files

import json
import os

def load_config(filename="config.json"):
    """Load configuration from JSON file."""
    default_config = {
        "database": {
            "host": "localhost",
            "port": 5432,
            "name": "mydb"
        },
        "api": {
            "base_url": "https://api.example.com",
            "timeout": 30
        }
    }
    
    if os.path.exists(filename):
        with open(filename, "r") as file:
            user_config = json.load(file)
            # Merge configs
            config = {**default_config, **user_config}
    else:
        config = default_config
        # Save default config
        with open(filename, "w") as file:
            json.dump(config, file, indent=2)
    
    return config

config = load_config()
print(f"Database host: {config['database']['host']}")

Store application configuration in JSON format.

API Data Handling

import json
import requests

def fetch_user_data(user_id):
    """Fetch user data from API."""
    url = f"https://jsonplaceholder.typicode.com/users/{user_id}"
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise exception for bad status codes
        
        # Parse JSON response
        user_data = response.json()
        return user_data
        
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"JSON parsing failed: {e}")
        return None

# Usage
user = fetch_user_data(1)
if user:
    print(f"User: {user['name']}")
    print(f"Email: {user['email']}")
    
    # Save to file
    with open("user_data.json", "w") as file:
        json.dump(user, file, indent=2)

Handle JSON data from REST APIs.

Data Validation

import json
from jsonschema import validate, ValidationError

# JSON Schema for validation
user_schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer", "minimum": 0},
        "email": {"type": "string", "format": "email"}
    },
    "required": ["name", "email"]
}

def validate_user_data(json_str):
    """Validate JSON data against schema."""
    try:
        data = json.loads(json_str)
        validate(data, user_schema)
        return True, data
    except json.JSONDecodeError as e:
        return False, f"Invalid JSON: {e}"
    except ValidationError as e:
        return False, f"Validation error: {e.message}"

# Test validation
valid_json = '{"name": "Alice", "age": 30, "email": "[email protected]"}'
invalid_json = '{"name": "Bob", "email": "invalid-email"}'

is_valid, result = validate_user_data(valid_json)
print(f"Valid: {is_valid}, Data: {result}")

is_valid, error = validate_user_data(invalid_json)
print(f"Valid: {is_valid}, Error: {error}")

Use JSON Schema for data validation (requires jsonschema package).

Pretty Printing Large JSON

import json

def pretty_print_json(data, max_width=80):
    """Pretty print JSON with reasonable line lengths."""
    json_str = json.dumps(data, indent=2)
    
    # Split long lines
    lines = json_str.split('\n')
    wrapped_lines = []
    
    for line in lines:
        if len(line) <= max_width:
            wrapped_lines.append(line)
        else:
            # Simple wrapping (could be improved)
            wrapped_lines.append(line[:max_width-3] + "...")
    
    return '\n'.join(wrapped_lines)

# Large nested data
large_data = {
    "users": [
        {"id": i, "name": f"User{i}", "details": {"email": f"user{i}@example.com", "active": True}}
        for i in range(10)
    ]
}

print(pretty_print_json(large_data))

Format large JSON data for better readability.

Performance Considerations

Benchmarking

import json
import time

# Large data for testing
large_data = [{"id": i, "data": "x" * 100} for i in range(10000)]

# Test encoding performance
start = time.time()
json_str = json.dumps(large_data)
encode_time = time.time() - start

# Test decoding performance
start = time.time()
decoded = json.loads(json_str)
decode_time = time.time() - start

print(f"Encoding 10k items: {encode_time:.4f} seconds")
print(f"Decoding 10k items: {decode_time:.4f} seconds")
print(f"JSON size: {len(json_str)} characters")

JSON is generally fast, but profile for large datasets.

Memory Usage

import json
import sys

data = {"large_list": list(range(10000))}

# JSON string
json_str = json.dumps(data)
json_size = sys.getsizeof(json_str)

# Python object
python_size = sys.getsizeof(data)

print(f"JSON string size: {json_size} bytes")
print(f"Python object size: {python_size} bytes")
print(f"JSON is {json_size/python_size:.1f}x larger")

JSON strings are larger than Python objects due to text representation.

Best Practices

  1. Use with statements for file operations
  2. Handle JSONDecodeError and JSONEncodeError
  3. Use indent for development, remove for production
  4. Validate JSON data when receiving from external sources
  5. Consider using ujson or orjson for better performance
  6. Use ISO format for dates (via custom encoder)
  7. Set appropriate encoding when working with files

External Resources:

Related Tutorials:

Last updated on