mikkohei13 · February 6, 2023 14:11
diff --git a/inat-sound.py b/inat-sound.py
 '''

 Script to download sound files from iNaturalist.
 Mikko Heikkinen / Luomus, 2023-02-06

 Requires "requests" library.

 input: iNat API URL with parameters
 output: files divided into folders by taxon

 '''

 import requests
 import json
 import os
 import time


 # PARAMETERS
 
 per_page = 5
 page = 1
 maxpages = 2

 sleep_between_soundfile_requests = 1


 def getPageFromAPI(url):
  """Get a single pageful of observations from iNat.

  Args:
    url (string): API URL to get data from.

 #  Raises:
 #    Exception: API responds with code other than 200, or does not repond at all.

  Returns:
    dict: Observatons and associated API metadata (paging etc.)
    False: if iNat API responds with error code, or does not repond at all. 
  """
  print("Getting " + url)
  
  try:
    inatResponse = requests.get(url)
  #printObject(inatResponse)
  except:
    raise Exception("Error getting data from iNaturalist API")

  # TODO: Find out why slightly too large idAbove returns 200 with zero results, but with much too large returns 400 
  if 200 == inatResponse.status_code:
    print("iNaturalist API responded " + str(inatResponse.status_code))
  else:
    errorCode = str(inatResponse.status_code)
    print("iNaturalist responded with error " + errorCode)
 #    raise Exception(f"iNaturalist API responded with error {errorCode}")
    return False

  # Tries to convert JSON to dict. If iNat API gave invalid JSON, returns False instead.
  try:
    inatResponseDict = json.loads(inatResponse.text)
  except:
    print("iNaturalist responded with invalid JSON")
    inatResponseDict = False

  return inatResponseDict


 def mkdir_ifnot_exist(dirname):
    if not os.path.exists(dirname):
        os.makedirs(dirname)
        print(f"Made new dir {dirname}")
    return True

 def get_file_extension(mime):
    if "audio/mpeg" == mime:
        return ".mp3"
    if "audio/x-wav" == mime:
        return ".wav"
    if "audio/mp4" == mime:
        return ".m4a"
    return ".unknown"


 print("\n\n-------------------------------------------------------------------------\n\n")

 while page <= maxpages:

    # This URL defines what kinds of observations to fetch

    # bird sounds, bounding box of Finland & surrounding areas, research grade
    # See https://inaturalist.laji.fi/observations?hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&place_id=any&quality_grade=research&sounds&subview=map&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&verifiable=any
    url = f"https://api.inaturalist.org/v1/observations?verifiable=any&order_by=observations.id&order=desc&page={ page }&spam=false&hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&quality_grade=research&sounds=true&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&locale=fi&preferred_place_id=7020&per_page={ per_page }"

    print(url)

    data = getPageFromAPI(url)
    if False == data:
        print("Stopping due to error")
        exit()

    #print(data)

    script_dir = os.path.dirname(__file__)

    for obs in data['results']:
        print("=====")
        if "sounds" in obs:
            print(obs["sounds"])
            obs_id = obs["id"]
            taxon_name = obs["taxon"]["name"]
            media_response = requests.get(obs["sounds"][0]["file_url"]) # Get only the first sound file of each observation
            mime = media_response.headers.get("Content-Type")
            file_extension = get_file_extension(mime)
 #            print("\n")

            dirname = "sounds/" + taxon_name.replace(" ", "_")
            mkdir_ifnot_exist(dirname)

            if media_response.status_code:
                relative_path = dirname + "/" + str(obs_id) + file_extension
                abs_file_path = os.path.join(script_dir, relative_path)
                fp = open(abs_file_path, 'wb')

                fp.write(media_response.content)
                fp.close()
                print(f"Wrote soundfile to {abs_file_path}")
            
            time.sleep(sleep_between_soundfile_requests)

        else:
            print("No sounds with these parameters")
    
    page = page + 1
	'''

	Script to download sound files from iNaturalist.
	Mikko Heikkinen / Luomus, 2023-02-06

	Requires "requests" library.

	input: iNat API URL with parameters
	output: files divided into folders by taxon

	'''

	import requests
	import json
	import os
	import time


	# PARAMETERS

	per_page = 5
	page = 1
	maxpages = 2

	sleep_between_soundfile_requests = 1


	def getPageFromAPI(url):
	"""Get a single pageful of observations from iNat.

	Args:
	url (string): API URL to get data from.

	# Raises:
	# Exception: API responds with code other than 200, or does not repond at all.

	Returns:
	dict: Observatons and associated API metadata (paging etc.)
	False: if iNat API responds with error code, or does not repond at all.
	"""
	print("Getting " + url)

	try:
	inatResponse = requests.get(url)
	#printObject(inatResponse)
	except:
	raise Exception("Error getting data from iNaturalist API")

	# TODO: Find out why slightly too large idAbove returns 200 with zero results, but with much too large returns 400
	if 200 == inatResponse.status_code:
	print("iNaturalist API responded " + str(inatResponse.status_code))
	else:
	errorCode = str(inatResponse.status_code)
	print("iNaturalist responded with error " + errorCode)
	# raise Exception(f"iNaturalist API responded with error {errorCode}")
	return False

	# Tries to convert JSON to dict. If iNat API gave invalid JSON, returns False instead.
	try:
	inatResponseDict = json.loads(inatResponse.text)
	except:
	print("iNaturalist responded with invalid JSON")
	inatResponseDict = False

	return inatResponseDict


	def mkdir_ifnot_exist(dirname):
	if not os.path.exists(dirname):
	os.makedirs(dirname)
	print(f"Made new dir {dirname}")
	return True

	def get_file_extension(mime):
	if "audio/mpeg" == mime:
	return ".mp3"
	if "audio/x-wav" == mime:
	return ".wav"
	if "audio/mp4" == mime:
	return ".m4a"
	return ".unknown"


	print("\n\n-------------------------------------------------------------------------\n\n")

	while page <= maxpages:

	# This URL defines what kinds of observations to fetch

	# bird sounds, bounding box of Finland & surrounding areas, research grade
	# See https://inaturalist.laji.fi/observations?hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&place_id=any&quality_grade=research&sounds&subview=map&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&verifiable=any
	url = f"https://api.inaturalist.org/v1/observations?verifiable=any&order_by=observations.id&order=desc&page={ page }&spam=false&hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&quality_grade=research&sounds=true&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&locale=fi&preferred_place_id=7020&per_page={ per_page }"

	print(url)

	data = getPageFromAPI(url)
	if False == data:
	print("Stopping due to error")
	exit()

	#print(data)

	script_dir = os.path.dirname(__file__)

	for obs in data['results']:
	print("=====")
	if "sounds" in obs:
	print(obs["sounds"])
	obs_id = obs["id"]
	taxon_name = obs["taxon"]["name"]
	media_response = requests.get(obs["sounds"][0]["file_url"]) # Get only the first sound file of each observation
	mime = media_response.headers.get("Content-Type")
	file_extension = get_file_extension(mime)
	# print("\n")

	dirname = "sounds/" + taxon_name.replace(" ", "_")
	mkdir_ifnot_exist(dirname)

	if media_response.status_code:
	relative_path = dirname + "/" + str(obs_id) + file_extension
	abs_file_path = os.path.join(script_dir, relative_path)
	fp = open(abs_file_path, 'wb')

	fp.write(media_response.content)
	fp.close()
	print(f"Wrote soundfile to {abs_file_path}")

	time.sleep(sleep_between_soundfile_requests)

	else:
	print("No sounds with these parameters")

	page = page + 1