sumanchapai · March 5, 2024 09:51
diff --git a/x.py b/x.py
 import math
 import pandas as pd
 import json
 from requests.api import get

 def get_url(offset: int):
    return f"https://api.data.gov.in/catalog/2c1fd4a5-67c7-4672-a2c6-a0a76c2f00da?api-key=579b464db66ec23bdd000001cdd3946e44ce4aad7209ff7b23ac571b&format=json&offset={offset}&limit=10&filters[month]=06&filters[year]=2021"

 def records_count() -> int:
    url = get_url(0)
    return get(url).json()['total']
    
 total_records = records_count() 
 max_records_in_a_file = 500

 no_of_files = math.ceil(total_records / max_records_in_a_file)

 for file_index in range(no_of_files):
    file_name = f"records_from_{file_index * max_records_in_a_file}"
    first_record_index = file_index * max_records_in_a_file
    last_record_index = (file_index+1) * max_records_in_a_file

    # If last file, last record index is whatever the total no of records is
    if file_index == no_of_files - 1:
        last_record_index = total_records

    records_for_file = []
    for offset in range(first_record_index, last_record_index, 10):
        data = get(get_url(offset)).json()
        records = data['records']
        records_for_file = [*records_for_file, *records]
    

    json_file_name = f"{file_name}.json"
    csv_file_name = f"{file_name}.csv"

    with open(json_file_name, 'w') as fd:
        json.dump(records_for_file, fd)

    df = pd.read_json(json_file_name, orient='columns')
    df.to_csv(csv_file_name, index=False)
	import math
	import pandas as pd
	import json
	from requests.api import get

	def get_url(offset: int):
	return f"https://api.data.gov.in/catalog/2c1fd4a5-67c7-4672-a2c6-a0a76c2f00da?api-key=579b464db66ec23bdd000001cdd3946e44ce4aad7209ff7b23ac571b&format=json&offset={offset}&limit=10&filters[month]=06&filters[year]=2021"

	def records_count() -> int:
	url = get_url(0)
	return get(url).json()['total']

	total_records = records_count()
	max_records_in_a_file = 500

	no_of_files = math.ceil(total_records / max_records_in_a_file)

	for file_index in range(no_of_files):
	file_name = f"records_from_{file_index * max_records_in_a_file}"
	first_record_index = file_index * max_records_in_a_file
	last_record_index = (file_index+1) * max_records_in_a_file

	# If last file, last record index is whatever the total no of records is
	if file_index == no_of_files - 1:
	last_record_index = total_records

	records_for_file = []
	for offset in range(first_record_index, last_record_index, 10):
	data = get(get_url(offset)).json()
	records = data['records']
	records_for_file = [records_for_file, records]


	json_file_name = f"{file_name}.json"
	csv_file_name = f"{file_name}.csv"

	with open(json_file_name, 'w') as fd:
	json.dump(records_for_file, fd)

	df = pd.read_json(json_file_name, orient='columns')
	df.to_csv(csv_file_name, index=False)