hectorip · January 11, 2025 18:50
diff --git a/SimpleDB.py b/SimpleDB.py
 import os
 import json
 import time
 import random
 from typing import Dict, Any

 class SimpleDB:
    def __init__(self, db_name: str):
        self.db_name = db_name
        self.data_file = f"{db_name}_data.json"
        self.index_file = f"{db_name}_index.json"
        
        # Crear archivos si no existen
        if not os.path.exists(self.data_file):
            self._write_json(self.data_file, [])
        if not os.path.exists(self.index_file):
            self._write_json(self.index_file, {})

        self.index = self._read_json(self.index_file)
        

    def _write_json(self, filename: str, data: Any) -> None:
        # Cada archivo tiene un tamaño limitado
        # 100 registros
        with open(filename, 'w') as f:
            json.dump(data, f)

    def _read_json(self, filename: str) -> Any:
        with open(filename, 'r') as f:
            return json.load(f)

    def insert_data(self, records: list[Dict[str, Any]]) -> None:
        """Inserta múltiples registros en la base de datos"""
        # Guardar datos
        self._write_json(self.data_file, records)
        
        # Crear índice por id
        index = {str(record['id']): i for i, record in enumerate(records)}
        # El indice debe contener el archivo en el que se encuentra
        # Diccionario = HashTable
        # Complejidad O(1)
        self._write_json(self.index_file, index)

    def find_by_id_no_index(self, target_id: int) -> Dict[str, Any]:
        """Busca un registro por ID sin usar índice (búsqueda secuencial)"""
        start_time = time.time()
        records = self._read_json(filename=self.data_file)
        
        
        # Simular lectura secuencial del disco
        for record in records:
            # time.sleep(0.001)  # Simular tiempo de lectura de disco
            if record['id'] == target_id:
                end_time = time.time()
                return {
                    'record': record,
                    'time': end_time - start_time,
                    'reads': records.index(record) + 1
                }
        
        end_time = time.time()
        return {
            'record': None,
            'time': end_time - start_time,
            'reads': len(records)
        }

    def find_by_id_with_index(self, target_id: int) -> Dict[str, Any]:
        """Busca un registro por ID usando índice"""
        start_time = time.time()
        
        # Si el ID no está en el índice
        # if str(target_id) not in self.index:
        #     end_time = time.time()
        #     return {
        #         'record': None,
        #         'time': end_time - start_time,
        #         'reads': 1
        #     }
        
        # Leer datos
        records = self._read_json(self.data_file)
        
        # Acceso directo usando índice
        # time.sleep(0.001)  # Simular tiempo de lectura de disco
        record = records[self.index[str(target_id)]]
        
        end_time = time.time()
        return {
            'record': record,
            'time': end_time - start_time,
            'reads': 1
        }

    def find_by_name(self, name: str):
        # Cargar los datos
        # recorrer todos los registros comparando el nombre
        # Indice sobre nombre:
        # hash(name) = id
        # hash(palabra) = id1, id2, id3
        pass
        
 def demo_index_performance():
    # Crear base de datos de ejemplo
    db = SimpleDB("example_db")
    
    # # Generar datos de ejemplo
    num_records = 1000000
    records = [
        {
            'id': i,
            'name': f'Usuario {i}',
            'email': f'usuario{i}@example.com',
            'data': 'x' * 100  # Simular campo de datos grande
        }
        for i in range(num_records)
    ]
    
    # # Insertar datos
    # db.insert_data(records)
    
    # Realizar búsquedas de ejemplo
    test_ids = [5, 500, 999]  # Principio, medio y final
    
    print(f"\nBuscando {len(test_ids)} registros en una base de datos con {num_records} registros:")
    print("\nBúsquedas sin índice:")
    for id in test_ids:
        result = db.find_by_id_no_index(id)
        print(f"ID {id}: {result['time']:.6f} segundos, {result['reads']} lecturas")
    
    print("\nBúsquedas con índice:")
    for id in test_ids:
        result = db.find_by_id_with_index(id)
        print(f"ID {id}: {result['time']:.6f} segundos, {result['reads']} lecturas")

 if __name__ == "__main__":
    demo_index_performance()
	import os
	import json
	import time
	import random
	from typing import Dict, Any

	class SimpleDB:
	def __init__(self, db_name: str):
	self.db_name = db_name
	self.data_file = f"{db_name}_data.json"
	self.index_file = f"{db_name}_index.json"

	# Crear archivos si no existen
	if not os.path.exists(self.data_file):
	self._write_json(self.data_file, [])
	if not os.path.exists(self.index_file):
	self._write_json(self.index_file, {})

	self.index = self._read_json(self.index_file)


	def _write_json(self, filename: str, data: Any) -> None:
	# Cada archivo tiene un tamaño limitado
	# 100 registros
	with open(filename, 'w') as f:
	json.dump(data, f)

	def _read_json(self, filename: str) -> Any:
	with open(filename, 'r') as f:
	return json.load(f)

	def insert_data(self, records: list[Dict[str, Any]]) -> None:
	"""Inserta múltiples registros en la base de datos"""
	# Guardar datos
	self._write_json(self.data_file, records)

	# Crear índice por id
	index = {str(record['id']): i for i, record in enumerate(records)}
	# El indice debe contener el archivo en el que se encuentra
	# Diccionario = HashTable
	# Complejidad O(1)
	self._write_json(self.index_file, index)

	def find_by_id_no_index(self, target_id: int) -> Dict[str, Any]:
	"""Busca un registro por ID sin usar índice (búsqueda secuencial)"""
	start_time = time.time()
	records = self._read_json(filename=self.data_file)


	# Simular lectura secuencial del disco
	for record in records:
	# time.sleep(0.001) # Simular tiempo de lectura de disco
	if record['id'] == target_id:
	end_time = time.time()
	return {
	'record': record,
	'time': end_time - start_time,
	'reads': records.index(record) + 1
	}

	end_time = time.time()
	return {
	'record': None,
	'time': end_time - start_time,
	'reads': len(records)
	}

	def find_by_id_with_index(self, target_id: int) -> Dict[str, Any]:
	"""Busca un registro por ID usando índice"""
	start_time = time.time()

	# Si el ID no está en el índice
	# if str(target_id) not in self.index:
	# end_time = time.time()
	# return {
	# 'record': None,
	# 'time': end_time - start_time,
	# 'reads': 1
	# }

	# Leer datos
	records = self._read_json(self.data_file)

	# Acceso directo usando índice
	# time.sleep(0.001) # Simular tiempo de lectura de disco
	record = records[self.index[str(target_id)]]

	end_time = time.time()
	return {
	'record': record,
	'time': end_time - start_time,
	'reads': 1
	}

	def find_by_name(self, name: str):
	# Cargar los datos
	# recorrer todos los registros comparando el nombre
	# Indice sobre nombre:
	# hash(name) = id
	# hash(palabra) = id1, id2, id3
	pass

	def demo_index_performance():
	# Crear base de datos de ejemplo
	db = SimpleDB("example_db")

	# # Generar datos de ejemplo
	num_records = 1000000
	records = [
	{
	'id': i,
	'name': f'Usuario {i}',
	'email': f'usuario{i}@example.com',
	'data': 'x' * 100 # Simular campo de datos grande
	}
	for i in range(num_records)
	]

	# # Insertar datos
	# db.insert_data(records)

	# Realizar búsquedas de ejemplo
	test_ids = [5, 500, 999] # Principio, medio y final

	print(f"\nBuscando {len(test_ids)} registros en una base de datos con {num_records} registros:")
	print("\nBúsquedas sin índice:")
	for id in test_ids:
	result = db.find_by_id_no_index(id)
	print(f"ID {id}: {result['time']:.6f} segundos, {result['reads']} lecturas")

	print("\nBúsquedas con índice:")
	for id in test_ids:
	result = db.find_by_id_with_index(id)
	print(f"ID {id}: {result['time']:.6f} segundos, {result['reads']} lecturas")

	if __name__ == "__main__":
	demo_index_performance()
No results found