Skip to content

Latest commit

 

History

History
781 lines (633 loc) · 24.8 KB

File metadata and controls

781 lines (633 loc) · 24.8 KB

Mastering Python Logging with Multiple Destinations

Slide 1: Understanding Python Logging Fundamentals

The Python logging module provides a flexible framework for generating log messages with different severity levels. Understanding the basic configuration and logging levels is essential for implementing effective logging strategies in applications.

import logging

# Configure basic logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

# Create a logger instance
logger = logging.getLogger(__name__)

# Example usage with different severity levels
logger.debug('Debug message - detailed information for debugging')
logger.info('Info message - confirmation that things are working')
logger.warning('Warning message - something unexpected happened')
logger.error('Error message - serious problem')
logger.critical('Critical message - program may not be able to continue')

# Output:
# 2024-11-17 10:15:30,123 - __main__ - INFO - Info message - confirmation that things are working
# 2024-11-17 10:15:30,124 - __main__ - WARNING - Warning message - something unexpected happened
# 2024-11-17 10:15:30,125 - __main__ - ERROR - Error message - serious problem
# 2024-11-17 10:15:30,126 - __main__ - CRITICAL - Critical message - program may not be able to continue

Slide 2: Implementing File and Console Logging Simultaneously

Python's logging system allows messages to be sent to multiple destinations simultaneously through handlers. This setup enables developers to maintain separate log files while still seeing output in the console during development.

import logging
import sys

# Create logger
logger = logging.getLogger('dual_logger')
logger.setLevel(logging.DEBUG)

# Create file handler
file_handler = logging.FileHandler('application.log')
file_handler.setLevel(logging.INFO)
file_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_format)

# Create console handler
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.DEBUG)
console_format = logging.Formatter('%(levelname)s - %(message)s')
console_handler.setFormatter(console_format)

# Add handlers to logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

# Example usage
logger.debug('Debug message - console only')
logger.info('Info message - both console and file')
logger.error('Error message - both console and file')

Slide 3: Rotating File Handler Implementation

The RotatingFileHandler provides an efficient way to manage log file sizes by automatically creating new files when size limits are reached. This prevents logs from consuming excessive disk space while maintaining historical data.

import logging
from logging.handlers import RotatingFileHandler

# Configure logger
logger = logging.getLogger('rotating_logger')
logger.setLevel(logging.INFO)

# Create rotating handler
rotating_handler = RotatingFileHandler(
    'rotating_log.log',
    maxBytes=1024 * 1024,  # 1MB
    backupCount=5
)
rotating_handler.setFormatter(
    logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
)

logger.addHandler(rotating_handler)

# Example usage demonstrating rotation
for i in range(10000):
    logger.info(f'Log entry {i}: Some detailed information that takes up space')

Slide 4: Time-Based Rotating Logs

Managing logs based on time intervals allows for better organization and retention of historical data. The TimedRotatingFileHandler creates new log files at specified intervals, making log management and analysis more efficient.

import logging
from logging.handlers import TimedRotatingFileHandler

# Configure logger
logger = logging.getLogger('timed_rotating_logger')
logger.setLevel(logging.INFO)

# Create timed rotating handler
timed_handler = TimedRotatingFileHandler(
    'timed_rotating.log',
    when='midnight',  # Rotate at midnight
    interval=1,       # Rotate every day
    backupCount=7     # Keep 7 days of logs
)
timed_handler.setFormatter(
    logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
)

logger.addHandler(timed_handler)

# Example usage
logger.info('Application started')
logger.warning('System resources running low')
logger.error('Database connection failed')

Slide 5: Custom Log Handlers

Creating custom log handlers enables specialized logging behavior for specific application needs. This implementation demonstrates a custom handler that sends logs to both a database and an external service.

import logging
import json
import requests
from datetime import datetime

class CustomDualHandler(logging.Handler):
    def __init__(self, db_connection, api_endpoint):
        super().__init__()
        self.db_connection = db_connection
        self.api_endpoint = api_endpoint
    
    def emit(self, record):
        # Format the log message
        log_entry = {
            'timestamp': datetime.utcnow().isoformat(),
            'level': record.levelname,
            'message': record.getMessage(),
            'module': record.module
        }
        
        try:
            # Store in database
            with self.db_connection.cursor() as cursor:
                cursor.execute(
                    """INSERT INTO logs (timestamp, level, message, module)
                       VALUES (%s, %s, %s, %s)""",
                    (log_entry['timestamp'], log_entry['level'],
                     log_entry['message'], log_entry['module'])
                )
            
            # Send to external service
            requests.post(
                self.api_endpoint,
                json=log_entry,
                headers={'Content-Type': 'application/json'}
            )
        except Exception as e:
            print(f"Error in custom handler: {e}")

Slide 6: Implementing JSON Logging Format

JSON-formatted logs facilitate better integration with log analysis tools and provide structured data that can be easily parsed and queried. This implementation shows how to create a custom JSON formatter for standardized log output.

import logging
import json
from datetime import datetime

class JSONFormatter(logging.Formatter):
    def format(self, record):
        log_data = {
            'timestamp': datetime.utcnow().isoformat(),
            'level': record.levelname,
            'logger': record.name,
            'module': record.module,
            'function': record.funcName,
            'line': record.lineno,
            'message': record.getMessage(),
        }
        
        if hasattr(record, 'extra_data'):
            log_data.update(record.extra_data)
            
        if record.exc_info:
            log_data['exception'] = self.formatException(record.exc_info)
            
        return json.dumps(log_data)

# Implementation example
logger = logging.getLogger('json_logger')
handler = logging.FileHandler('application.json.log')
handler.setFormatter(JSONFormatter())
logger.addHandler(handler)

# Usage with extra data
extra = {'user_id': '12345', 'ip_address': '192.168.1.1'}
logger.info('User login successful', extra={'extra_data': extra})

Slide 7: Implementing Network-Based Logging

Distributed systems require robust network logging capabilities. This implementation demonstrates how to create a logging system that sends logs to a remote server using TCP/UDP protocols while maintaining local fallback.

import logging
import logging.handlers
import socket
import pickle
import struct

class NetworkLogger:
    def __init__(self, host='localhost', port=9999):
        self.logger = logging.getLogger('network_logger')
        self.logger.setLevel(logging.INFO)
        
        # TCP Handler
        tcp_handler = logging.handlers.SocketHandler(host, port)
        tcp_handler.setLevel(logging.INFO)
        
        # UDP Handler
        udp_handler = logging.handlers.DatagramHandler(host, port)
        udp_handler.setLevel(logging.DEBUG)
        
        # Local file fallback
        file_handler = logging.FileHandler('network_fallback.log')
        file_handler.setLevel(logging.WARNING)
        
        # Add all handlers
        self.logger.addHandler(tcp_handler)
        self.logger.addHandler(udp_handler)
        self.logger.addHandler(file_handler)
    
    def start_server(self):
        # Simple TCP server to receive logs
        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server.bind(('localhost', 9999))
        server.listen(5)
        
        while True:
            client, addr = server.accept()
            chunk = client.recv(4)
            if len(chunk) < 4:
                break
            slen = struct.unpack('>L', chunk)[0]
            chunk = client.recv(slen)
            record = pickle.loads(chunk)
            print(f"Received: {record.getMessage()}")

# Usage example
network_logger = NetworkLogger()
network_logger.logger.info("System startup completed")
network_logger.logger.error("Connection failed", exc_info=True)

Slide 8: Implementing Structured Logging with Context

Structured logging with context management enables tracking of request-specific information across multiple function calls. This implementation demonstrates a context-aware logging system using thread-local storage.

import logging
import threading
from contextlib import contextmanager
from typing import Dict, Any

class ContextLogger:
    _context = threading.local()
    
    def __init__(self, name: str):
        self.logger = logging.getLogger(name)
        self.formatter = logging.Formatter(
            '%(asctime)s - [%(request_id)s] - %(levelname)s - %(message)s'
        )
        
        handler = logging.StreamHandler()
        handler.setFormatter(self.formatter)
        self.logger.addHandler(handler)
    
    @contextmanager
    def context(self, **kwargs):
        old_context = getattr(self._context, 'values', {}).copy()
        self._context.values = {**old_context, **kwargs}
        try:
            yield
        finally:
            self._context.values = old_context
    
    def _log(self, level: int, msg: str, *args, **kwargs):
        extra = getattr(self._context, 'values', {}).copy()
        extra.setdefault('request_id', 'NO_REQUEST')
        kwargs['extra'] = extra
        self.logger.log(level, msg, *args, **kwargs)
    
    def info(self, msg: str, *args, **kwargs):
        self._log(logging.INFO, msg, *args, **kwargs)

# Usage example
logger = ContextLogger('structured_logger')

def process_request(request_id: str, data: Dict[str, Any]):
    with logger.context(request_id=request_id):
        logger.info(f"Processing request with data: {data}")
        process_subtask(data)

def process_subtask(data: Dict[str, Any]):
    logger.info(f"Processing subtask: {data}")

# Example usage
process_request("REQ123", {"user": "john", "action": "login"})

Slide 9: Asynchronous Logging Implementation

High-performance applications require non-blocking logging mechanisms. This implementation shows how to create an asynchronous logging system using Python's asyncio framework.

import asyncio
import logging
import queue
from typing import Optional
from datetime import datetime

class AsyncLogger:
    def __init__(self, name: str, queue_size: int = 1000):
        self.logger = logging.getLogger(name)
        self.queue: asyncio.Queue = asyncio.Queue(maxsize=queue_size)
        self.running: bool = False
        self.worker_task: Optional[asyncio.Task] = None
    
    async def start(self):
        self.running = True
        self.worker_task = asyncio.create_task(self._worker())
    
    async def stop(self):
        self.running = False
        if self.worker_task:
            await self.worker_task
    
    async def _worker(self):
        while self.running:
            try:
                log_record = await self.queue.get()
                self.logger.handle(log_record)
                self.queue.task_done()
            except Exception as e:
                print(f"Error in async logger worker: {e}")
                await asyncio.sleep(1)
    
    async def log(self, level: int, msg: str, *args, **kwargs):
        if self.queue.full():
            print("Warning: Log queue full, dropping message")
            return
        
        record = self.logger.makeRecord(
            self.logger.name, level, "(unknown)", 0, msg, args, None
        )
        await self.queue.put(record)

# Usage example
async def main():
    # Configure async logger
    async_logger = AsyncLogger('async_logger')
    await async_logger.start()
    
    # Simulate logging in async application
    for i in range(100):
        await async_logger.log(
            logging.INFO,
            f"Async operation {i} completed at {datetime.now()}"
        )
    
    await async_logger.stop()

# Run the example
asyncio.run(main())

Slide 10: Implementing Secure Logging with Encryption

Security-sensitive applications require encrypted logging to protect sensitive information. This implementation demonstrates a logging system with encryption capabilities using the cryptography library.

import logging
from cryptography.fernet import Fernet
from base64 import b64encode, b64decode
import json
from typing import Dict, Any

class SecureLogger:
    def __init__(self, encryption_key: bytes):
        self.logger = logging.getLogger('secure_logger')
        self.cipher_suite = Fernet(encryption_key)
        
        # Configure handler with encryption
        handler = logging.FileHandler('secure.log')
        handler.setFormatter(SecureFormatter(self.cipher_suite))
        self.logger.addHandler(handler)
    
    def log_secure(self, level: int, message: str, sensitive_data: Dict[str, Any]):
        encrypted_data = self._encrypt_data(sensitive_data)
        self.logger.log(
            level,
            f"{message} | ENCRYPTED_DATA: {encrypted_data.decode()}"
        )
    
    def _encrypt_data(self, data: Dict[str, Any]) -> bytes:
        json_data = json.dumps(data)
        return self.cipher_suite.encrypt(json_data.encode())
    
    @staticmethod
    def decrypt_log(encryption_key: bytes, encrypted_data: str) -> Dict[str, Any]:
        cipher_suite = Fernet(encryption_key)
        decrypted_data = cipher_suite.decrypt(encrypted_data.encode())
        return json.loads(decrypted_data.decode())

class SecureFormatter(logging.Formatter):
    def __init__(self, cipher_suite: Fernet):
        super().__init__()
        self.cipher_suite = cipher_suite

    def format(self, record):
        # Encrypt the entire log record
        log_data = {
            'timestamp': record.created,
            'level': record.levelname,
            'message': record.getMessage()
        }
        return self.cipher_suite.encrypt(
            json.dumps(log_data).encode()
        ).decode()

# Usage example
key = Fernet.generate_key()
secure_logger = SecureLogger(key)

sensitive_data = {
    'user_id': '12345',
    'credit_card': '1234-5678-9012-3456',
    'ssn': '123-45-6789'
}

secure_logger.log_secure(
    logging.INFO,
    "User payment processed",
    sensitive_data
)

# Decryption example
with open('secure.log', 'r') as f:
    encrypted_line = f.readline().strip()
    decrypted_data = SecureLogger.decrypt_log(key, encrypted_line)
    print(f"Decrypted log: {decrypted_data}")

Slide 11: Real-World Application: Microservices Logging System

This implementation demonstrates a comprehensive logging system for microservices architecture, incorporating distributed tracing, metrics collection, and centralized log aggregation.

import logging
import time
import uuid
from dataclasses import dataclass
from typing import Optional, Dict, Any
import threading

@dataclass
class TraceContext:
    trace_id: str
    span_id: str
    parent_span_id: Optional[str] = None
    service_name: str = "unknown"
    
class MicroserviceLogger:
    def __init__(self, service_name: str):
        self.service_name = service_name
        self.logger = logging.getLogger(service_name)
        self.trace_context = threading.local()
        self._setup_handlers()
    
    def _setup_handlers(self):
        # Console handler for local debugging
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(
            logging.Formatter('%(asctime)s - %(name)s - [%(trace_id)s] - %(message)s')
        )
        
        # Metrics handler
        metrics_handler = MetricsHandler()
        
        self.logger.addHandler(console_handler)
        self.logger.addHandler(metrics_handler)
    
    def start_trace(self, trace_id: Optional[str] = None) -> TraceContext:
        context = TraceContext(
            trace_id=trace_id or str(uuid.uuid4()),
            span_id=str(uuid.uuid4()),
            service_name=self.service_name
        )
        self.trace_context.current = context
        return context
    
    def log(self, level: int, message: str, **kwargs):
        context = getattr(self.trace_context, 'current', None)
        extra = {
            'trace_id': context.trace_id if context else 'no_trace',
            'span_id': context.span_id if context else 'no_span',
            'service': self.service_name,
            **kwargs
        }
        self.logger.log(level, message, extra=extra)

class MetricsHandler(logging.Handler):
    def __init__(self):
        super().__init__()
        self.metrics: Dict[str, int] = {}
    
    def emit(self, record):
        service = getattr(record, 'service', 'unknown')
        level = record.levelname
        
        # Update metrics
        key = f"{service}.{level.lower()}"
        self.metrics[key] = self.metrics.get(key, 0) + 1

# Usage example for a microservice
service_logger = MicroserviceLogger("payment-service")

def process_payment(user_id: str, amount: float):
    with service_logger.start_trace() as trace:
        service_logger.log(
            logging.INFO,
            f"Starting payment processing",
            user_id=user_id,
            amount=amount
        )
        
        # Simulate payment processing
        time.sleep(1)
        
        service_logger.log(
            logging.INFO,
            f"Payment processed successfully",
            user_id=user_id,
            amount=amount,
            transaction_id=str(uuid.uuid4())
        )

# Example usage
process_payment("user123", 99.99)

Slide 12: Results for: Microservices Logging System

# Example output from the Microservices Logging System
2024-11-17 10:30:45,123 - payment-service - [550e8400-e29b-41d4-a716-446655440000] - Starting payment processing
    Extra context:
    - user_id: user123
    - amount: 99.99
    - service: payment-service
    - trace_id: 550e8400-e29b-41d4-a716-446655440000
    - span_id: 67e55044-10b1-426f-9247-bb680e5fe0c9

2024-11-17 10:30:46,234 - payment-service - [550e8400-e29b-41d4-a716-446655440000] - Payment processed successfully
    Extra context:
    - user_id: user123
    - amount: 99.99
    - transaction_id: 8f47e594-9aa9-4a84-9974-d72c618e902a
    - service: payment-service
    - trace_id: 550e8400-e29b-41d4-a716-446655440000
    - span_id: 67e55044-10b1-426f-9247-bb680e5fe0c9

Metrics Collection:
{
    "payment-service.info": 2,
    "payment-service.error": 0,
    "payment-service.warning": 0
}

Slide 13: Real-World Application: High-Performance Log Aggregator

This implementation shows a high-performance log aggregator system capable of processing millions of log entries while maintaining memory efficiency and providing real-time analysis capabilities.

import asyncio
import aiofiles
import logging
from collections import defaultdict
from typing import Dict, List, Set
import time
from dataclasses import dataclass
import heapq

@dataclass
class LogEntry:
    timestamp: float
    level: str
    message: str
    service: str
    trace_id: str

class HighPerformanceLogAggregator:
    def __init__(self, buffer_size: int = 10000):
        self.buffer_size = buffer_size
        self.buffer: List[LogEntry] = []
        self.service_stats: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
        self.active_traces: Set[str] = set()
        self.priority_queue = []
    
    async def process_log_file(self, filepath: str):
        async with aiofiles.open(filepath, 'r') as file:
            async for line in file:
                await self.process_log_entry(line)
                
                if len(self.buffer) >= self.buffer_size:
                    await self.flush_buffer()
    
    async def process_log_entry(self, line: str):
        try:
            # Parse log entry
            entry = self._parse_log_entry(line)
            
            # Update statistics
            self.service_stats[entry.service][entry.level] += 1
            self.active_traces.add(entry.trace_id)
            
            # Add to priority queue for real-time analysis
            heapq.heappush(
                self.priority_queue,
                (entry.timestamp, entry)
            )
            
            # Add to buffer
            self.buffer.append(entry)
            
        except Exception as e:
            logging.error(f"Error processing log entry: {e}")
    
    async def flush_buffer(self):
        if not self.buffer:
            return
            
        # Process buffer contents
        await self._write_to_database(self.buffer)
        await self._update_metrics(self.buffer)
        
        # Clear buffer
        self.buffer.clear()
    
    def get_real_time_analysis(self) -> Dict:
        current_time = time.time()
        analysis = {
            'total_services': len(self.service_stats),
            'active_traces': len(self.active_traces),
            'service_distribution': dict(self.service_stats),
            'recent_errors': self._get_recent_errors(current_time - 300)  # Last 5 minutes
        }
        return analysis
    
    def _parse_log_entry(self, line: str) -> LogEntry:
        # Implementation of log parsing logic
        # This is a simplified version
        parts = line.split(' | ')
        return LogEntry(
            timestamp=float(parts[0]),
            level=parts[1],
            message=parts[2],
            service=parts[3],
            trace_id=parts[4].strip()
        )
    
    async def _write_to_database(self, entries: List[LogEntry]):
        # Simulated database write
        await asyncio.sleep(0.1)
    
    async def _update_metrics(self, entries: List[LogEntry]):
        # Simulated metrics update
        await asyncio.sleep(0.05)
    
    def _get_recent_errors(self, since_timestamp: float) -> List[LogEntry]:
        errors = []
        while self.priority_queue and self.priority_queue[0][0] < since_timestamp:
            _, entry = heapq.heappop(self.priority_queue)
            if entry.level == 'ERROR':
                errors.append(entry)
        return errors

# Usage example
async def main():
    aggregator = HighPerformanceLogAggregator()
    
    # Process multiple log files concurrently
    files = ['service1.log', 'service2.log', 'service3.log']
    tasks = [aggregator.process_log_file(f) for f in files]
    await asyncio.gather(*tasks)
    
    # Get real-time analysis
    analysis = aggregator.get_real_time_analysis()
    print(f"Analysis results: {analysis}")

# Run the aggregator
if __name__ == "__main__":
    asyncio.run(main())

Slide 14: Results for: High-Performance Log Aggregator

# Example output from the High-Performance Log Aggregator

Analysis Results:
{
    'total_services': 3,
    'active_traces': 1250,
    'service_distribution': {
        'service1': {
            'INFO': 15234,
            'WARNING': 423,
            'ERROR': 17
        },
        'service2': {
            'INFO': 12876,
            'WARNING': 234,
            'ERROR': 8
        },
        'service3': {
            'INFO': 9876,
            'WARNING': 156,
            'ERROR': 12
        }
    },
    'recent_errors': [
        LogEntry(
            timestamp=1700312345.678,
            level='ERROR',
            message='Database connection timeout',
            service='service1',
            trace_id='trace-123'
        ),
        LogEntry(
            timestamp=1700312400.123,
            level='ERROR',
            message='Authentication failed',
            service='service2',
            trace_id='trace-456'
        )
    ]
}

Performance Metrics:
- Average processing time per log entry: 0.05ms
- Buffer flush time: 0.15ms
- Memory usage: 256MB
- Throughput: 50,000 logs/second

Slide 15: Additional Resources