Skip to content

Instantly share code, notes, and snippets.

@hughdbrown
Created August 10, 2025 19:18
Show Gist options
  • Save hughdbrown/3b952006ae434964e007c1a30ff0031e to your computer and use it in GitHub Desktop.
Save hughdbrown/3b952006ae434964e007c1a30ff0031e to your computer and use it in GitHub Desktop.
Slide stack in MarkDown for JSON in python
theme title class highlighter drawings transition mdc
default
JSON Mastery for Python Developers
text-center
shiki
enabled
true
slide-left
true

JSON Mastery for Python Developers

Advanced Techniques, Performance, and Best Practices

Press Space for next page

layout: center

What is JSON?

JavaScript Object Notation - A lightweight, text-based data interchange format

# JSON is everywhere in Python development
import json

data = {
    "name": "Alice",
    "age": 30,
    "skills": ["Python", "JavaScript", "SQL"],
    "active": True,
    "profile": None
}

json_string = json.dumps(data)
print(json_string)
# {"name": "Alice", "age": 30, "skills": ["Python", "JavaScript", "SQL"], "active": true, "profile": null}
JSON bridges the gap between Python objects and web APIs, configuration files, and data storage

JSON ↔ Python Data Types

Understanding the type mapping is crucial for intermediate developers

JSON Type Python Type Notes
object dict Key-value pairs
array list Ordered sequences
string str UTF-8 encoded
number int or float Automatic detection
true/false bool Boolean values
null None Null/empty value
# JSON to Python
json_data = '{"count": 42, "items": ["a", "b"], "valid": true, "meta": null}'
python_obj = json.loads(json_data)
# {'count': 42, 'items': ['a', 'b'], 'valid': True, 'meta': None}

# Python to JSON
python_data = {"temperature": 23.5, "readings": [1, 2, 3]}
json_string = json.dumps(python_data)
# {"temperature": 23.5, "readings": [1, 2, 3]}

# Gotcha: Tuples become arrays!
json.dumps({"coords": (10, 20)})  # {"coords": [10, 20]}

Core JSON Operations

The four essential functions every Python developer should master

import json

# loads() - Parse JSON string to Python object
json_str = '{"name": "Bob", "score": 95}'
data = json.loads(json_str)
print(data["name"])  # Bob

# dumps() - Convert Python object to JSON string
python_obj = {"users": ["Alice", "Bob"], "count": 2}
json_output = json.dumps(python_obj, indent=2)
print(json_output)
# Pretty-printed JSON with 2-space indentation

# load() - Read JSON from file
with open("config.json", "r") as file:
    config = json.load(file)
    
# Use the loaded configuration
database_url = config.get("database_url")

# dump() - Write JSON to file
data = {"timestamp": "2024-01-15", "processed": True}
with open("result.json", "w") as file:
    json.dump(data, file, indent=2)

Custom JSON Encoding

Making non-serializable objects JSON-friendly

import json
from datetime import datetime
from decimal import Decimal

# Problem: These objects aren't JSON serializable by default
data = {
    "timestamp": datetime.now(),
    "amount": Decimal("19.99")
}

# Solution 1: Custom JSONEncoder class
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        elif isinstance(obj, Decimal):
            return float(obj)
        return super().default(obj)

# Solution 2: Using default parameter
def json_serializer(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    elif isinstance(obj, Decimal):
        return float(obj)
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

# Usage
json.dumps(data, cls=CustomEncoder)
json.dumps(data, default=json_serializer)

Custom JSON Decoding

Converting JSON back to complex Python objects

import json
from datetime import datetime
from dataclasses import dataclass

@dataclass
class User:
    name: str
    email: str
    created_at: datetime
    
    def __repr__(self):
        return f"User({self.name}, {self.email})"

# Custom object hook for decoding
def user_decoder(dct):
    if 'created_at' in dct and 'email' in dct:
        return User(
            name=dct['name'],
            email=dct['email'],
            created_at=datetime.fromisoformat(dct['created_at'])
        )
    return dct

# JSON string representing a user
user_json = '''
{
    "name": "Alice Johnson", 
    "email": "[email protected]",
    "created_at": "2024-01-15T10:30:00"
}
'''

user_obj = json.loads(user_json, object_hook=user_decoder)
print(user_obj)  # User(Alice Johnson, [email protected])

Working with Complex Objects

Dataclasses, UUIDs, and datetime objects

import json
from dataclasses import dataclass, asdict
from datetime import datetime
from uuid import UUID, uuid4

@dataclass
class Product:
    id: UUID
    name: str
    price: float
    created_at: datetime
    
    def to_json(self):
        data = asdict(self)
        # Convert non-serializable fields
        data['id'] = str(data['id'])
        data['created_at'] = data['created_at'].isoformat()
        return data
    
    @classmethod
    def from_json(cls, json_str):
        data = json.loads(json_str)
        return cls(
            id=UUID(data['id']),
            name=data['name'],
            price=data['price'],
            created_at=datetime.fromisoformat(data['created_at'])
        )

# Usage
product = Product(
    id=uuid4(),
    name="Python Book",
    price=29.99,
    created_at=datetime.now()
)

json_data = json.dumps(product.to_json(), indent=2)
restored_product = Product.from_json(json_data)

layout: center class: text-center

Performance Matters

Standard json vs High-Performance Alternatives

When processing large datasets or building high-throughput APIs,
JSON performance can become a bottleneck.

Let's explore faster alternatives!


Performance Comparison

Benchmarking popular JSON libraries for Python

# Performance results from comprehensive benchmarks
# Processing a 2.2MB JSON file (Canada GeoJSON coordinates)

libraries = {
    "json":       {"time": "26.06ms", "speed": "1x",    "notes": "Python standard library"},
    "ujson":      {"time": "16.55ms", "speed": "1.6x",  "notes": "Ultra fast, C-based"},
    "rapidjson":  {"time": "29.26ms", "speed": "0.9x",  "notes": "Slower than standard!"},
    "orjson":     {"time": "9.69ms",  "speed": "2.7x",  "notes": "Rust-powered champion"}
}
Library Load Time Speed vs json Installation
json 26.06ms 1.0x (baseline) Built-in
ujson 16.55ms 1.6x faster pip install ujson
rapidjson 29.26ms 0.9x (slower!) pip install python-rapidjson
orjson 9.69ms 2.7x faster pip install orjson
Benchmarks based on real-world datasets. Results may vary by data structure and size.

Meet orjson

The fastest JSON library for Python, written in Rust

import orjson
from datetime import datetime
import uuid

# orjson advantages:
# ✅ 2-6x faster than standard json
# ✅ Built-in support for datetime, UUID, dataclasses
# ✅ Always returns bytes (explicit encoding)

# Basic usage
data = {"name": "Alice", "timestamp": datetime.now()}

# Encoding (note: returns bytes!)
json_bytes = orjson.dumps(data)
json_string = json_bytes.decode('utf-8')

# Decoding
parsed_data = orjson.loads(json_bytes)
# or
parsed_data = orjson.loads(json_string)

# Advanced features - automatic handling of complex types
complex_data = {
    "id": uuid.uuid4(),
    "created": datetime.now(),
    "coordinates": (40.7128, -74.0060),  # Tuple preserved as array
    "active": True
}

result = orjson.dumps(complex_data, option=orjson.OPT_INDENT_2)
print(result.decode())

Error Handling & Validation

Robust JSON processing for production code

import json
from typing import Dict, Any, Optional

def safe_json_loads(json_str: str) -> Optional[Dict[Any, Any]]:
    """Safely parse JSON with error handling."""
    try:
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"JSON parsing error at line {e.lineno}, column {e.colno}: {e.msg}")
        return None
    except Exception as e:
        print(f"Unexpected error: {e}")
        return None

# Common validation patterns
def validate_user_data(data: dict) -> bool:
    """Validate required fields in user data."""
    required_fields = ['name', 'email']
    
    for field in required_fields:
        if field not in data or not data[field]:
            print(f"Missing required field: {field}")
            return False
    
    # Email validation (basic)
    if '@' not in data['email']:
        print("Invalid email format")
        return False
    
    return True

# Usage example
user_json = '{"name": "Bob", "email": "[email protected]", "age": 25}'
user_data = safe_json_loads(user_json)

if user_data and validate_user_data(user_data):
    print("Valid user data received")
    # Process the user data
else:
    print("Invalid or malformed user data")
    # Handle error appropriately

Best Practices & Security

Production-ready JSON handling guidelines

🔒 Security

import json

# ❌ Never do this - security risk!
# eval(json_string)

# ✅ Always use json.loads()
data = json.loads(json_string)

# ✅ Validate input size
MAX_JSON_SIZE = 1024 * 1024  # 1MB
if len(json_string) > MAX_JSON_SIZE:
    raise ValueError("JSON too large")

# ✅ Set strict parsing
json.loads(json_string, strict=True)

🎯 Performance Tips

# ✅ Reuse encoder instances
encoder = json.JSONEncoder(separators=(',', ':'))
result = encoder.encode(data)

# ✅ Use appropriate alternatives
import orjson  # For speed
import ujson   # For compatibility + speed

# ✅ Minimize indent in production
json.dumps(data)  # No indent = smaller size

📏 Data Handling

# ✅ Handle large datasets efficiently
def process_large_json(file_path):
    with open(file_path, 'r') as f:
        for line in f:
            if line.strip():  # Skip empty lines
                try:
                    record = json.loads(line)
                    yield record
                except json.JSONDecodeError:
                    continue  # Skip malformed lines

# ✅ Use appropriate data types
from decimal import Decimal
json.dumps({"price": float(Decimal("19.99"))})

# ✅ Handle encoding properly
json.dumps(data, ensure_ascii=False)  # For Unicode

🛠 Migration Strategy

# Gradual migration to orjson
try:
    import orjson
    dumps = lambda x: orjson.dumps(x).decode()
    loads = orjson.loads
except ImportError:
    import json
    dumps = json.dumps
    loads = json.loads

layout: center class: text-center

Practical Example

Building a Configuration Manager with JSON

import json
import orjson
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional

class ConfigManager:
    def __init__(self, config_file: str = "config.json", use_orjson: bool = True):
        self.config_file = Path(config_file)
        self.use_orjson = use_orjson
        self._config: Dict[str, Any] = {}
        self.load_config()
    
    def load_config(self) -> None:
        """Load configuration from JSON file."""
        if not self.config_file.exists():
            self._config = self._get_default_config()
            self.save_config()
            return
            
        try:
            with open(self.config_file, 'rb' if self.use_orjson else 'r') as f:
                if self.use_orjson:
                    self._config = orjson.loads(f.read())
                else:
                    self._config = json.load(f)
        except Exception as e:
            print(f"Error loading config: {e}")
            self._config = self._get_default_config()
    
    def save_config(self) -> None:
        """Save current configuration to file."""
        try:
            with open(self.config_file, 'wb' if self.use_orjson else 'w') as f:
                if self.use_orjson:
                    f.write(orjson.dumps(self._config, option=orjson.OPT_INDENT_2))
                else:
                    json.dump(self._config, f, indent=2)
        except Exception as e:
            print(f"Error saving config: {e}")
    
    def get(self, key: str, default: Any = None) -> Any:
        return self._config.get(key, default)
    
    def set(self, key: str, value: Any) -> None:
        self._config[key] = value
        self._config['last_modified'] = datetime.now().isoformat()
        self.save_config()

# Usage
config = ConfigManager()
config.set('database_url', 'postgresql://localhost:5432/mydb')
db_url = config.get('database_url')

layout: end class: text-center

Thank You!

JSON Mastery Achieved 🎉

Key Takeaways

  • Use orjson for performance-critical applications
  • Always handle JSON errors gracefully
  • Validate input data consistently
  • Consider memory usage with large datasets

Quick Reference

  • json.loads() / json.dumps()
  • Custom encoders for complex objects
  • orjson for 2-6x speed improvement
  • Security: never use eval()

Next Steps

  • Profile your JSON usage
  • Migrate high-traffic endpoints to orjson
  • Implement robust error handling
  • Consider schema validation libraries
Questions? Let's discuss JSON optimization strategies!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment