Skip to content

Instantly share code, notes, and snippets.

@mikkohei13
Created February 6, 2023 14:11
Show Gist options
  • Save mikkohei13/694f2d9dfc32868fbe2366ee1856fbc0 to your computer and use it in GitHub Desktop.
Save mikkohei13/694f2d9dfc32868fbe2366ee1856fbc0 to your computer and use it in GitHub Desktop.
Python script to fetch sound files from iNaturalist API
'''
Script to download sound files from iNaturalist.
Mikko Heikkinen / Luomus, 2023-02-06
Requires "requests" library.
input: iNat API URL with parameters
output: files divided into folders by taxon
'''
import requests
import json
import os
import time
# PARAMETERS
per_page = 5
page = 1
maxpages = 2
sleep_between_soundfile_requests = 1
def getPageFromAPI(url):
"""Get a single pageful of observations from iNat.
Args:
url (string): API URL to get data from.
# Raises:
# Exception: API responds with code other than 200, or does not repond at all.
Returns:
dict: Observatons and associated API metadata (paging etc.)
False: if iNat API responds with error code, or does not repond at all.
"""
print("Getting " + url)
try:
inatResponse = requests.get(url)
#printObject(inatResponse)
except:
raise Exception("Error getting data from iNaturalist API")
# TODO: Find out why slightly too large idAbove returns 200 with zero results, but with much too large returns 400
if 200 == inatResponse.status_code:
print("iNaturalist API responded " + str(inatResponse.status_code))
else:
errorCode = str(inatResponse.status_code)
print("iNaturalist responded with error " + errorCode)
# raise Exception(f"iNaturalist API responded with error {errorCode}")
return False
# Tries to convert JSON to dict. If iNat API gave invalid JSON, returns False instead.
try:
inatResponseDict = json.loads(inatResponse.text)
except:
print("iNaturalist responded with invalid JSON")
inatResponseDict = False
return inatResponseDict
def mkdir_ifnot_exist(dirname):
if not os.path.exists(dirname):
os.makedirs(dirname)
print(f"Made new dir {dirname}")
return True
def get_file_extension(mime):
if "audio/mpeg" == mime:
return ".mp3"
if "audio/x-wav" == mime:
return ".wav"
if "audio/mp4" == mime:
return ".m4a"
return ".unknown"
print("\n\n-------------------------------------------------------------------------\n\n")
while page <= maxpages:
# This URL defines what kinds of observations to fetch
# bird sounds, bounding box of Finland & surrounding areas, research grade
# See https://inaturalist.laji.fi/observations?hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&place_id=any&quality_grade=research&sounds&subview=map&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&verifiable=any
url = f"https://api.inaturalist.org/v1/observations?verifiable=any&order_by=observations.id&order=desc&page={ page }&spam=false&hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&quality_grade=research&sounds=true&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&locale=fi&preferred_place_id=7020&per_page={ per_page }"
print(url)
data = getPageFromAPI(url)
if False == data:
print("Stopping due to error")
exit()
#print(data)
script_dir = os.path.dirname(__file__)
for obs in data['results']:
print("=====")
if "sounds" in obs:
print(obs["sounds"])
obs_id = obs["id"]
taxon_name = obs["taxon"]["name"]
media_response = requests.get(obs["sounds"][0]["file_url"]) # Get only the first sound file of each observation
mime = media_response.headers.get("Content-Type")
file_extension = get_file_extension(mime)
# print("\n")
dirname = "sounds/" + taxon_name.replace(" ", "_")
mkdir_ifnot_exist(dirname)
if media_response.status_code:
relative_path = dirname + "/" + str(obs_id) + file_extension
abs_file_path = os.path.join(script_dir, relative_path)
fp = open(abs_file_path, 'wb')
fp.write(media_response.content)
fp.close()
print(f"Wrote soundfile to {abs_file_path}")
time.sleep(sleep_between_soundfile_requests)
else:
print("No sounds with these parameters")
page = page + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment