Created
February 6, 2023 14:11
-
-
Save mikkohei13/694f2d9dfc32868fbe2366ee1856fbc0 to your computer and use it in GitHub Desktop.
Python script to fetch sound files from iNaturalist API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Script to download sound files from iNaturalist. | |
Mikko Heikkinen / Luomus, 2023-02-06 | |
Requires "requests" library. | |
input: iNat API URL with parameters | |
output: files divided into folders by taxon | |
''' | |
import requests | |
import json | |
import os | |
import time | |
# PARAMETERS | |
per_page = 5 | |
page = 1 | |
maxpages = 2 | |
sleep_between_soundfile_requests = 1 | |
def getPageFromAPI(url): | |
"""Get a single pageful of observations from iNat. | |
Args: | |
url (string): API URL to get data from. | |
# Raises: | |
# Exception: API responds with code other than 200, or does not repond at all. | |
Returns: | |
dict: Observatons and associated API metadata (paging etc.) | |
False: if iNat API responds with error code, or does not repond at all. | |
""" | |
print("Getting " + url) | |
try: | |
inatResponse = requests.get(url) | |
#printObject(inatResponse) | |
except: | |
raise Exception("Error getting data from iNaturalist API") | |
# TODO: Find out why slightly too large idAbove returns 200 with zero results, but with much too large returns 400 | |
if 200 == inatResponse.status_code: | |
print("iNaturalist API responded " + str(inatResponse.status_code)) | |
else: | |
errorCode = str(inatResponse.status_code) | |
print("iNaturalist responded with error " + errorCode) | |
# raise Exception(f"iNaturalist API responded with error {errorCode}") | |
return False | |
# Tries to convert JSON to dict. If iNat API gave invalid JSON, returns False instead. | |
try: | |
inatResponseDict = json.loads(inatResponse.text) | |
except: | |
print("iNaturalist responded with invalid JSON") | |
inatResponseDict = False | |
return inatResponseDict | |
def mkdir_ifnot_exist(dirname): | |
if not os.path.exists(dirname): | |
os.makedirs(dirname) | |
print(f"Made new dir {dirname}") | |
return True | |
def get_file_extension(mime): | |
if "audio/mpeg" == mime: | |
return ".mp3" | |
if "audio/x-wav" == mime: | |
return ".wav" | |
if "audio/mp4" == mime: | |
return ".m4a" | |
return ".unknown" | |
print("\n\n-------------------------------------------------------------------------\n\n") | |
while page <= maxpages: | |
# This URL defines what kinds of observations to fetch | |
# bird sounds, bounding box of Finland & surrounding areas, research grade | |
# See https://inaturalist.laji.fi/observations?hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&place_id=any&quality_grade=research&sounds&subview=map&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&verifiable=any | |
url = f"https://api.inaturalist.org/v1/observations?verifiable=any&order_by=observations.id&order=desc&page={ page }&spam=false&hrank=species&lrank=species&nelat=71.36832173629628&nelng=42.3522172216326&quality_grade=research&sounds=true&swlat=54.89686448376497&swlng=4.1197953466326&taxon_id=3&locale=fi&preferred_place_id=7020&per_page={ per_page }" | |
print(url) | |
data = getPageFromAPI(url) | |
if False == data: | |
print("Stopping due to error") | |
exit() | |
#print(data) | |
script_dir = os.path.dirname(__file__) | |
for obs in data['results']: | |
print("=====") | |
if "sounds" in obs: | |
print(obs["sounds"]) | |
obs_id = obs["id"] | |
taxon_name = obs["taxon"]["name"] | |
media_response = requests.get(obs["sounds"][0]["file_url"]) # Get only the first sound file of each observation | |
mime = media_response.headers.get("Content-Type") | |
file_extension = get_file_extension(mime) | |
# print("\n") | |
dirname = "sounds/" + taxon_name.replace(" ", "_") | |
mkdir_ifnot_exist(dirname) | |
if media_response.status_code: | |
relative_path = dirname + "/" + str(obs_id) + file_extension | |
abs_file_path = os.path.join(script_dir, relative_path) | |
fp = open(abs_file_path, 'wb') | |
fp.write(media_response.content) | |
fp.close() | |
print(f"Wrote soundfile to {abs_file_path}") | |
time.sleep(sleep_between_soundfile_requests) | |
else: | |
print("No sounds with these parameters") | |
page = page + 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment