-
-
Save sumanchapai/b3144a191a29b3552a26f107b343ae97 to your computer and use it in GitHub Desktop.
open data gov.in
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import pandas as pd | |
import json | |
from requests.api import get | |
def get_url(offset: int): | |
return f"https://api.data.gov.in/catalog/2c1fd4a5-67c7-4672-a2c6-a0a76c2f00da?api-key=579b464db66ec23bdd000001cdd3946e44ce4aad7209ff7b23ac571b&format=json&offset={offset}&limit=10&filters[month]=06&filters[year]=2021" | |
def records_count() -> int: | |
url = get_url(0) | |
return get(url).json()['total'] | |
total_records = records_count() | |
max_records_in_a_file = 500 | |
no_of_files = math.ceil(total_records / max_records_in_a_file) | |
for file_index in range(no_of_files): | |
file_name = f"records_from_{file_index * max_records_in_a_file}" | |
first_record_index = file_index * max_records_in_a_file | |
last_record_index = (file_index+1) * max_records_in_a_file | |
# If last file, last record index is whatever the total no of records is | |
if file_index == no_of_files - 1: | |
last_record_index = total_records | |
records_for_file = [] | |
for offset in range(first_record_index, last_record_index, 10): | |
data = get(get_url(offset)).json() | |
records = data['records'] | |
records_for_file = [*records_for_file, *records] | |
json_file_name = f"{file_name}.json" | |
csv_file_name = f"{file_name}.csv" | |
with open(json_file_name, 'w') as fd: | |
json.dump(records_for_file, fd) | |
df = pd.read_json(json_file_name, orient='columns') | |
df.to_csv(csv_file_name, index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment