Skip to content

Instantly share code, notes, and snippets.

@sumanchapai
Last active March 5, 2024 09:51
Show Gist options
  • Save sumanchapai/b3144a191a29b3552a26f107b343ae97 to your computer and use it in GitHub Desktop.
Save sumanchapai/b3144a191a29b3552a26f107b343ae97 to your computer and use it in GitHub Desktop.
open data gov.in
import math
import pandas as pd
import json
from requests.api import get
def get_url(offset: int):
return f"https://api.data.gov.in/catalog/2c1fd4a5-67c7-4672-a2c6-a0a76c2f00da?api-key=579b464db66ec23bdd000001cdd3946e44ce4aad7209ff7b23ac571b&format=json&offset={offset}&limit=10&filters[month]=06&filters[year]=2021"
def records_count() -> int:
url = get_url(0)
return get(url).json()['total']
total_records = records_count()
max_records_in_a_file = 500
no_of_files = math.ceil(total_records / max_records_in_a_file)
for file_index in range(no_of_files):
file_name = f"records_from_{file_index * max_records_in_a_file}"
first_record_index = file_index * max_records_in_a_file
last_record_index = (file_index+1) * max_records_in_a_file
# If last file, last record index is whatever the total no of records is
if file_index == no_of_files - 1:
last_record_index = total_records
records_for_file = []
for offset in range(first_record_index, last_record_index, 10):
data = get(get_url(offset)).json()
records = data['records']
records_for_file = [*records_for_file, *records]
json_file_name = f"{file_name}.json"
csv_file_name = f"{file_name}.csv"
with open(json_file_name, 'w') as fd:
json.dump(records_for_file, fd)
df = pd.read_json(json_file_name, orient='columns')
df.to_csv(csv_file_name, index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment