Created
February 4, 2022 17:14
-
-
Save qb20nh/06ad21054205b62c01d2887ccf3377f3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import sleep, time | |
import os | |
def binary_search(condition, low, high): | |
if callable(condition) and high > low: | |
mid = int((high + low) / 2) | |
res = condition(mid) | |
if res == 0: | |
return mid | |
elif res < 0: | |
return binary_search(condition, low, mid - 1) | |
else: | |
return binary_search(condition, mid + 1, high) | |
def find_json_property(idx): | |
print(f'seeking to {idx}') | |
file.seek(idx) | |
read_success = False | |
preview_bytes = 1024 | |
read_content = None | |
offset = 0 | |
while not read_success: | |
file.seek(idx+offset) | |
try: | |
read_content = file.read(preview_bytes).decode("utf-8") | |
read_success = True | |
except Exception: | |
offset += 1 | |
print(f'content: {read_content}') | |
target = 2500000000 | |
print('querying DB for property songID') # query db here | |
sleep(1) # assume every query takes about 1 second | |
if abs(idx - target) <= preview_bytes: # change this to json property match result | |
return 0 | |
elif idx > target: | |
return -1 | |
else: | |
return 1 | |
file_path = "C:/json/song_data_file.json" # replace this with your file location | |
start_time = time() | |
size_in_bytes = os.path.getsize(file_path) | |
print(f'file size: {size_in_bytes}') | |
file = open(file_path, 'rb') | |
result = binary_search(find_json_property, 0, size_in_bytes) | |
end_time = time() | |
print(f'found at {result}') | |
print(f'elapsed {end_time-start_time}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment