Skip to content

Instantly share code, notes, and snippets.

@mikkohei13
Created March 14, 2025 18:00
Show Gist options
  • Save mikkohei13/6f6b0e1c1065b9e7407da84fd6e50c32 to your computer and use it in GitHub Desktop.
Save mikkohei13/6f6b0e1c1065b9e7407da84fd6e50c32 to your computer and use it in GitHub Desktop.
Fetches atlas observations from winter period which can affect squre results
# Script that fetches winter period atlas observations and checks which of them might affect atlas results.
import os
import requests
import json
import time
from datetime import datetime
# Fetch atlas observations
token = "YOU TOKEN HERE"
skipped_names = "NAMES TO FILTER OUT"
def get_collection_name(collection_id):
global token
collection_id = collection_id.replace("http://tun.fi/", "")
cache_dir = "./cache"
# Check if collection name is already cached
cache_file = os.path.join(cache_dir, f"{collection_id}.txt")
if os.path.exists(cache_file):
# print(f"Collection name for {collection_id} already cached")
with open(cache_file, "r") as f:
return f.read().strip()
# If not cached, fetch from API
# Add a small delay to avoid overwhelming the API
time.sleep(0.2)
url = f"https://api.laji.fi/v0/collections/{collection_id}?lang=fi&langFallback=true&access_token={token}"
# print(f"Fetching collection name for {collection_id} from API url {url}")
response = requests.get(url)
collection_name = response.json()["longName"]
# Cache the result
with open(cache_file, "w") as f:
f.write(collection_name)
return collection_name
def fetch_all_data(base_url, page_size=100):
"""
Fetch all data from api.laji.fi by paginating through all available pages.
Args:
base_url (str): The base URL for the API request without page and pageSize parameters
page_size (int, optional): Number of records per page. Defaults to 100.
Returns:
tuple: (all_results, total_count) where all_results is a list of all data dictionaries
and total_count is the total number of records fetched
"""
all_results = []
page = 1
total_pages = None
total_count = 0
while total_pages is None or page <= total_pages:
# Construct URL with pagination parameters
if '?' in base_url:
url = f"{base_url}&pageSize={page_size}&page={page}"
else:
url = f"{base_url}?pageSize={page_size}&page={page}"
print(f"Fetching page {page}...")
response = requests.get(url)
# Check if request was successful
if response.status_code != 200:
print(f"Error fetching data: {response.status_code}")
print(response.text)
break
data = response.json()
# Get pagination information
current_page = data.get('currentPage', 1)
total_pages = data.get('lastPage', 1)
total_count = data.get('total', 0)
# Add results to our collection
results = data.get('results', [])
all_results.extend(results)
print(f"Fetched page {current_page}/{total_pages}, got {len(results)} records")
# Move to next page
page += 1
# Add a small delay to avoid overwhelming the API
time.sleep(0.2)
return all_results, len(all_results)
def get_square_data(square_id):
# Add a small delay to avoid overwhelming the API
time.sleep(0.2)
print(f"Fetching square data for {square_id}")
url = f"https://atlas-api.2.rahtiapp.fi/api/v1/grid/{square_id}/atlas"
response = requests.get(url)
species_observed = dict()
for species in response.json()["data"]:
species_finnish = species["speciesName"]
atlas_code_string = species["atlasCode"]["key"].replace("MY.atlasCodeEnum", "")
if len(atlas_code_string) == 1:
atlas_code_string = atlas_code_string + "0"
atlas_code = int(atlas_code_string)
species_observed[species_finnish] = atlas_code
return species_observed
def generate_observation_info(observation, square_id):
observers = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi"
collection_name = get_collection_name(observation['document']['collectionId'])
row = f"{observation['unit']['linkings']['taxon']['nameFinnish']}\t{observation['unit']['linkings']['taxon']['scientificName']}\t{observation['unit']['atlasCode'].replace('http://tun.fi/MY.atlasCodeEnum', '')}\t{observation['gathering']['displayDateTime']}\t{observers}\t{collection_name}\t{observation['document']['documentId']}\thttps://tulokset.lintuatlas.fi/grid/{square_id}"
return row
def load_bird_association_areas():
global token
url = f"https://api.laji.fi/v0/areas?type=birdAssociationArea&pageSize=50&access_token={token}"
print(url)
response = requests.get(url)
area_list = response.json()["results"]
return area_list
page_size = 1000
page = 1
time_range = "2022/2025"
area_list = load_bird_association_areas()
skipped_count = 0
for area in area_list:
# Each area is a dictionary with id and name
society_id = area["id"]
society_name = area["name"]
observations_to_check = []
base_url = f"https://api.laji.fi/v0/warehouse/query/unit/list?cache=true&useIdentificationAnnotations=true&includeSubTaxa=true&includeNonValidTaxa=true&informalTaxonGroupId=MVL.1&taxonRankId=MX.species&birdAssociationAreaId={society_id}&time={time_range}&season=0901%2F0331&individualCountMin=1&qualityIssues=NO_ISSUES&recordQuality=COMMUNITY_VERIFIED,NEUTRAL,EXPERT_VERIFIED&needsCheck=false&atlasClass=MY.atlasClassEnumB%2CMY.atlasClassEnumC%2CMY.atlasClassEnumD&selected=gathering.conversions.ykj10kmCenter.lat,gathering.conversions.ykj10kmCenter.lon,unit.linkings.taxon.nameFinnish,unit.linkings.taxon.scientificName,unit.unitId,document.documentId,gathering.displayDateTime,document.collectionId,gathering.team,unit.atlasCode,unit.atlasClass&access_token={token}"
all_data, count = fetch_all_data(base_url, page_size)
print(f"Fetched a total of {count} records")
#print(all_data)
squares_data = {}
# Loop through all data and aggregate by ykj10km
for observation in all_data:
if "conversions" not in observation["gathering"]:
print(observation)
exit()
lat = observation["gathering"]["conversions"]["ykj10kmCenter"]["lat"]
lon = observation["gathering"]["conversions"]["ykj10kmCenter"]["lon"]
square_id = f"{lat}:{lon}"
if square_id not in squares_data:
squares_data[square_id] = []
squares_data[square_id].append(observation)
#print(squares_data)
# Loop the aggregated data one ykj10km square at a time
square_count = len(squares_data)
i = 0
for square_id, observations in squares_data.items():
i += 1
print(f"Processing square {square_id}, {i}/{square_count}")
species_observed = get_square_data(square_id)
for observation in observations:
observer = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi"
if observer == skipped_names:
skipped_count += 1
print(f"Skipping {observer} {skipped_count}")
continue
species_finnish = observation["unit"]["linkings"]["taxon"]["nameFinnish"]
atlas_code_string = observation["unit"]["atlasCode"].replace("http://tun.fi/MY.atlasCodeEnum", "")
if len(atlas_code_string) == 1:
atlas_code_string = atlas_code_string + "0"
atlas_code = int(atlas_code_string)
if species_finnish in species_observed:
if atlas_code >= species_observed[species_finnish]:
row = generate_observation_info(observation, square_id)
observations_to_check.append(row)
# Save as tsv
society_name = society_name.replace(" ", "_").replace(".", "")
# datetime as YYYYMMDDHHMM
datetime_str = datetime.now().strftime("%Y%m%d%H%M")
filename = f"./winter_output/winter_{society_name}_{datetime_str}.tsv"
with open(filename, "w") as f:
# Add header
f.write("species\tscientific_name\tatlas_code\tdate\tobserver\tcollection\tdocument_id\tsquare_link\n")
for row in observations_to_check:
f.write(row + "\n")
print(f"Saved {len(observations_to_check)} observations to check for {society_name}")
print(f"Skipped {skipped_count} observations")
print("--------------------------------------------------------------------------")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment