Created
January 11, 2025 18:50
-
-
Save hectorip/74668daa5a246db9aaaf9a87c3536270 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import json | |
| import time | |
| import random | |
| from typing import Dict, Any | |
| class SimpleDB: | |
| def __init__(self, db_name: str): | |
| self.db_name = db_name | |
| self.data_file = f"{db_name}_data.json" | |
| self.index_file = f"{db_name}_index.json" | |
| # Crear archivos si no existen | |
| if not os.path.exists(self.data_file): | |
| self._write_json(self.data_file, []) | |
| if not os.path.exists(self.index_file): | |
| self._write_json(self.index_file, {}) | |
| self.index = self._read_json(self.index_file) | |
| def _write_json(self, filename: str, data: Any) -> None: | |
| # Cada archivo tiene un tamaño limitado | |
| # 100 registros | |
| with open(filename, 'w') as f: | |
| json.dump(data, f) | |
| def _read_json(self, filename: str) -> Any: | |
| with open(filename, 'r') as f: | |
| return json.load(f) | |
| def insert_data(self, records: list[Dict[str, Any]]) -> None: | |
| """Inserta múltiples registros en la base de datos""" | |
| # Guardar datos | |
| self._write_json(self.data_file, records) | |
| # Crear índice por id | |
| index = {str(record['id']): i for i, record in enumerate(records)} | |
| # El indice debe contener el archivo en el que se encuentra | |
| # Diccionario = HashTable | |
| # Complejidad O(1) | |
| self._write_json(self.index_file, index) | |
| def find_by_id_no_index(self, target_id: int) -> Dict[str, Any]: | |
| """Busca un registro por ID sin usar índice (búsqueda secuencial)""" | |
| start_time = time.time() | |
| records = self._read_json(filename=self.data_file) | |
| # Simular lectura secuencial del disco | |
| for record in records: | |
| # time.sleep(0.001) # Simular tiempo de lectura de disco | |
| if record['id'] == target_id: | |
| end_time = time.time() | |
| return { | |
| 'record': record, | |
| 'time': end_time - start_time, | |
| 'reads': records.index(record) + 1 | |
| } | |
| end_time = time.time() | |
| return { | |
| 'record': None, | |
| 'time': end_time - start_time, | |
| 'reads': len(records) | |
| } | |
| def find_by_id_with_index(self, target_id: int) -> Dict[str, Any]: | |
| """Busca un registro por ID usando índice""" | |
| start_time = time.time() | |
| # Si el ID no está en el índice | |
| # if str(target_id) not in self.index: | |
| # end_time = time.time() | |
| # return { | |
| # 'record': None, | |
| # 'time': end_time - start_time, | |
| # 'reads': 1 | |
| # } | |
| # Leer datos | |
| records = self._read_json(self.data_file) | |
| # Acceso directo usando índice | |
| # time.sleep(0.001) # Simular tiempo de lectura de disco | |
| record = records[self.index[str(target_id)]] | |
| end_time = time.time() | |
| return { | |
| 'record': record, | |
| 'time': end_time - start_time, | |
| 'reads': 1 | |
| } | |
| def find_by_name(self, name: str): | |
| # Cargar los datos | |
| # recorrer todos los registros comparando el nombre | |
| # Indice sobre nombre: | |
| # hash(name) = id | |
| # hash(palabra) = id1, id2, id3 | |
| pass | |
| def demo_index_performance(): | |
| # Crear base de datos de ejemplo | |
| db = SimpleDB("example_db") | |
| # # Generar datos de ejemplo | |
| num_records = 1000000 | |
| records = [ | |
| { | |
| 'id': i, | |
| 'name': f'Usuario {i}', | |
| 'email': f'usuario{i}@example.com', | |
| 'data': 'x' * 100 # Simular campo de datos grande | |
| } | |
| for i in range(num_records) | |
| ] | |
| # # Insertar datos | |
| # db.insert_data(records) | |
| # Realizar búsquedas de ejemplo | |
| test_ids = [5, 500, 999] # Principio, medio y final | |
| print(f"\nBuscando {len(test_ids)} registros en una base de datos con {num_records} registros:") | |
| print("\nBúsquedas sin índice:") | |
| for id in test_ids: | |
| result = db.find_by_id_no_index(id) | |
| print(f"ID {id}: {result['time']:.6f} segundos, {result['reads']} lecturas") | |
| print("\nBúsquedas con índice:") | |
| for id in test_ids: | |
| result = db.find_by_id_with_index(id) | |
| print(f"ID {id}: {result['time']:.6f} segundos, {result['reads']} lecturas") | |
| if __name__ == "__main__": | |
| demo_index_performance() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment