Python JSON Handling
JSON (JavaScript Object Notation) is a lightweight data format for data exchange. Python’s json module provides tools for encoding and decoding JSON data, essential for APIs and data storage.
JSON Basics
JSON represents data as key-value pairs and arrays.
JSON Data Types
# JSON supports these types:
# - Objects: {"key": "value"}
# - Arrays: [1, 2, 3]
# - Strings: "text"
# - Numbers: 42, 3.14
# - Booleans: true, false
# - null: null
# Python equivalents:
# dict, list, str, int/float, bool, None
import json
# Example JSON string
json_data = '''
{
"name": "Alice",
"age": 30,
"city": "New York",
"hobbies": ["reading", "coding"],
"is_student": false,
"grades": null
}
'''
print("JSON supports objects, arrays, strings, numbers, booleans, and null")JSON is language-independent but similar to Python data structures. See JSON specification.
Encoding (Serialization)
Convert Python objects to JSON strings.
json.dumps()
import json
# Python data
person = {
"name": "Bob",
"age": 25,
"city": "Chicago",
"hobbies": ["gaming", "music"],
"active": True,
"spouse": None
}
# Convert to JSON string
json_string = json.dumps(person)
print(json_string)
print(f"Type: {type(json_string)}")
# Pretty print
pretty_json = json.dumps(person, indent=2)
print("\nPretty printed:")
print(pretty_json)json.dumps() converts Python objects to JSON strings. Use indent for readability.
Custom Encoding
import json
from datetime import datetime
# Custom object
class Person:
def __init__(self, name, birth_date):
self.name = name
self.birth_date = birth_date
# Custom encoder
class PersonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Person):
return {
"name": obj.name,
"birth_year": obj.birth_date.year,
"type": "Person"
}
return super().default(obj)
person = Person("Alice", datetime(1990, 5, 15))
# Encode with custom encoder
json_str = json.dumps(person, cls=PersonEncoder, indent=2)
print(json_str)Extend JSONEncoder for custom object serialization.
Decoding (Deserialization)
Convert JSON strings to Python objects.
json.loads()
import json
# JSON string
json_str = '{"name": "Charlie", "age": 35, "city": "Boston"}'
# Convert to Python dict
person = json.loads(json_str)
print(f"Name: {person['name']}")
print(f"Type: {type(person)}")
# JSON array
json_array = '[1, 2, 3, "four", true, null]'
python_list = json.loads(json_array)
print(f"List: {python_list}")
print(f"Types: {[type(item) for item in python_list]}")json.loads() converts JSON strings to Python objects. JSON null becomes Python None.
Handling Parse Errors
import json
# Invalid JSON strings
invalid_json = '{"name": "Alice", "age": }' # Missing value
incomplete_json = '{"name": "Bob"' # Missing closing brace
try:
result = json.loads(invalid_json)
except json.JSONDecodeError as e:
print(f"JSON decode error: {e}")
try:
result = json.loads(incomplete_json)
except json.JSONDecodeError as e:
print(f"JSON decode error: {e}")
print(f"Position: {e.pos}")JSONDecodeError provides detailed error information for debugging.
File Operations
Read and write JSON files directly.
json.dump() and json.load()
import json
# Sample data
users = [
{"id": 1, "name": "Alice", "email": "[email protected]"},
{"id": 2, "name": "Bob", "email": "[email protected]"},
{"id": 3, "name": "Charlie", "email": "[email protected]"}
]
# Write to file
with open("users.json", "w") as file:
json.dump(users, file, indent=2)
print("Data written to users.json")
# Read from file
with open("users.json", "r") as file:
loaded_users = json.load(file)
print(f"Loaded {len(loaded_users)} users")
print(f"First user: {loaded_users[0]}")json.dump() and json.load() handle file I/O automatically.
Advanced Features
Handle complex data types and formatting.
Sorting Keys
import json
data = {"zebra": 1, "apple": 2, "banana": 3}
# Default (insertion order preserved in Python 3.7+)
normal = json.dumps(data)
print(f"Normal: {normal}")
# Sorted keys
sorted_json = json.dumps(data, sort_keys=True)
print(f"Sorted: {sorted_json}")sort_keys=True sorts object keys alphabetically for consistent output.
Custom Separators
import json
data = {"name": "Alice", "age": 30}
# Custom separators (compact)
compact = json.dumps(data, separators=(',', ':'))
print(f"Compact: {compact}")
# Default separators
default = json.dumps(data)
print(f"Default: {default}")separators parameter controls spacing between items and keys.
Handling Special Types
import json
from datetime import datetime
# Data with datetime
data = {
"event": "Conference",
"date": datetime(2023, 12, 25, 10, 30)
}
# Custom encoder for datetime
def datetime_encoder(obj):
if isinstance(obj, datetime):
return obj.isoformat()
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
json_str = json.dumps(data, default=datetime_encoder, indent=2)
print(json_str)
# Decode back
decoded = json.loads(json_str)
print(f"Decoded date: {decoded['date']}")Use default parameter for types not natively supported by JSON.
Practical Examples
Configuration Files
import json
import os
def load_config(filename="config.json"):
"""Load configuration from JSON file."""
default_config = {
"database": {
"host": "localhost",
"port": 5432,
"name": "mydb"
},
"api": {
"base_url": "https://api.example.com",
"timeout": 30
}
}
if os.path.exists(filename):
with open(filename, "r") as file:
user_config = json.load(file)
# Merge configs
config = {**default_config, **user_config}
else:
config = default_config
# Save default config
with open(filename, "w") as file:
json.dump(config, file, indent=2)
return config
config = load_config()
print(f"Database host: {config['database']['host']}")Store application configuration in JSON format.
API Data Handling
import json
import requests
def fetch_user_data(user_id):
"""Fetch user data from API."""
url = f"https://jsonplaceholder.typicode.com/users/{user_id}"
try:
response = requests.get(url)
response.raise_for_status() # Raise exception for bad status codes
# Parse JSON response
user_data = response.json()
return user_data
except requests.RequestException as e:
print(f"Request failed: {e}")
return None
except json.JSONDecodeError as e:
print(f"JSON parsing failed: {e}")
return None
# Usage
user = fetch_user_data(1)
if user:
print(f"User: {user['name']}")
print(f"Email: {user['email']}")
# Save to file
with open("user_data.json", "w") as file:
json.dump(user, file, indent=2)Handle JSON data from REST APIs.
Data Validation
import json
from jsonschema import validate, ValidationError
# JSON Schema for validation
user_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer", "minimum": 0},
"email": {"type": "string", "format": "email"}
},
"required": ["name", "email"]
}
def validate_user_data(json_str):
"""Validate JSON data against schema."""
try:
data = json.loads(json_str)
validate(data, user_schema)
return True, data
except json.JSONDecodeError as e:
return False, f"Invalid JSON: {e}"
except ValidationError as e:
return False, f"Validation error: {e.message}"
# Test validation
valid_json = '{"name": "Alice", "age": 30, "email": "[email protected]"}'
invalid_json = '{"name": "Bob", "email": "invalid-email"}'
is_valid, result = validate_user_data(valid_json)
print(f"Valid: {is_valid}, Data: {result}")
is_valid, error = validate_user_data(invalid_json)
print(f"Valid: {is_valid}, Error: {error}")Use JSON Schema for data validation (requires jsonschema package).
Pretty Printing Large JSON
import json
def pretty_print_json(data, max_width=80):
"""Pretty print JSON with reasonable line lengths."""
json_str = json.dumps(data, indent=2)
# Split long lines
lines = json_str.split('\n')
wrapped_lines = []
for line in lines:
if len(line) <= max_width:
wrapped_lines.append(line)
else:
# Simple wrapping (could be improved)
wrapped_lines.append(line[:max_width-3] + "...")
return '\n'.join(wrapped_lines)
# Large nested data
large_data = {
"users": [
{"id": i, "name": f"User{i}", "details": {"email": f"user{i}@example.com", "active": True}}
for i in range(10)
]
}
print(pretty_print_json(large_data))Format large JSON data for better readability.
Performance Considerations
Benchmarking
import json
import time
# Large data for testing
large_data = [{"id": i, "data": "x" * 100} for i in range(10000)]
# Test encoding performance
start = time.time()
json_str = json.dumps(large_data)
encode_time = time.time() - start
# Test decoding performance
start = time.time()
decoded = json.loads(json_str)
decode_time = time.time() - start
print(f"Encoding 10k items: {encode_time:.4f} seconds")
print(f"Decoding 10k items: {decode_time:.4f} seconds")
print(f"JSON size: {len(json_str)} characters")JSON is generally fast, but profile for large datasets.
Memory Usage
import json
import sys
data = {"large_list": list(range(10000))}
# JSON string
json_str = json.dumps(data)
json_size = sys.getsizeof(json_str)
# Python object
python_size = sys.getsizeof(data)
print(f"JSON string size: {json_size} bytes")
print(f"Python object size: {python_size} bytes")
print(f"JSON is {json_size/python_size:.1f}x larger")JSON strings are larger than Python objects due to text representation.
Best Practices
- Use
withstatements for file operations - Handle
JSONDecodeErrorandJSONEncodeError - Use
indentfor development, remove for production - Validate JSON data when receiving from external sources
- Consider using
ujsonororjsonfor better performance - Use ISO format for dates (via custom encoder)
- Set appropriate encoding when working with files
External Resources:
Related Tutorials: