Created
March 14, 2025 18:00
-
-
Save mikkohei13/6f6b0e1c1065b9e7407da84fd6e50c32 to your computer and use it in GitHub Desktop.
Fetches atlas observations from winter period which can affect squre results
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script that fetches winter period atlas observations and checks which of them might affect atlas results. | |
import os | |
import requests | |
import json | |
import time | |
from datetime import datetime | |
# Fetch atlas observations | |
token = "YOU TOKEN HERE" | |
skipped_names = "NAMES TO FILTER OUT" | |
def get_collection_name(collection_id): | |
global token | |
collection_id = collection_id.replace("http://tun.fi/", "") | |
cache_dir = "./cache" | |
# Check if collection name is already cached | |
cache_file = os.path.join(cache_dir, f"{collection_id}.txt") | |
if os.path.exists(cache_file): | |
# print(f"Collection name for {collection_id} already cached") | |
with open(cache_file, "r") as f: | |
return f.read().strip() | |
# If not cached, fetch from API | |
# Add a small delay to avoid overwhelming the API | |
time.sleep(0.2) | |
url = f"https://api.laji.fi/v0/collections/{collection_id}?lang=fi&langFallback=true&access_token={token}" | |
# print(f"Fetching collection name for {collection_id} from API url {url}") | |
response = requests.get(url) | |
collection_name = response.json()["longName"] | |
# Cache the result | |
with open(cache_file, "w") as f: | |
f.write(collection_name) | |
return collection_name | |
def fetch_all_data(base_url, page_size=100): | |
""" | |
Fetch all data from api.laji.fi by paginating through all available pages. | |
Args: | |
base_url (str): The base URL for the API request without page and pageSize parameters | |
page_size (int, optional): Number of records per page. Defaults to 100. | |
Returns: | |
tuple: (all_results, total_count) where all_results is a list of all data dictionaries | |
and total_count is the total number of records fetched | |
""" | |
all_results = [] | |
page = 1 | |
total_pages = None | |
total_count = 0 | |
while total_pages is None or page <= total_pages: | |
# Construct URL with pagination parameters | |
if '?' in base_url: | |
url = f"{base_url}&pageSize={page_size}&page={page}" | |
else: | |
url = f"{base_url}?pageSize={page_size}&page={page}" | |
print(f"Fetching page {page}...") | |
response = requests.get(url) | |
# Check if request was successful | |
if response.status_code != 200: | |
print(f"Error fetching data: {response.status_code}") | |
print(response.text) | |
break | |
data = response.json() | |
# Get pagination information | |
current_page = data.get('currentPage', 1) | |
total_pages = data.get('lastPage', 1) | |
total_count = data.get('total', 0) | |
# Add results to our collection | |
results = data.get('results', []) | |
all_results.extend(results) | |
print(f"Fetched page {current_page}/{total_pages}, got {len(results)} records") | |
# Move to next page | |
page += 1 | |
# Add a small delay to avoid overwhelming the API | |
time.sleep(0.2) | |
return all_results, len(all_results) | |
def get_square_data(square_id): | |
# Add a small delay to avoid overwhelming the API | |
time.sleep(0.2) | |
print(f"Fetching square data for {square_id}") | |
url = f"https://atlas-api.2.rahtiapp.fi/api/v1/grid/{square_id}/atlas" | |
response = requests.get(url) | |
species_observed = dict() | |
for species in response.json()["data"]: | |
species_finnish = species["speciesName"] | |
atlas_code_string = species["atlasCode"]["key"].replace("MY.atlasCodeEnum", "") | |
if len(atlas_code_string) == 1: | |
atlas_code_string = atlas_code_string + "0" | |
atlas_code = int(atlas_code_string) | |
species_observed[species_finnish] = atlas_code | |
return species_observed | |
def generate_observation_info(observation, square_id): | |
observers = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi" | |
collection_name = get_collection_name(observation['document']['collectionId']) | |
row = f"{observation['unit']['linkings']['taxon']['nameFinnish']}\t{observation['unit']['linkings']['taxon']['scientificName']}\t{observation['unit']['atlasCode'].replace('http://tun.fi/MY.atlasCodeEnum', '')}\t{observation['gathering']['displayDateTime']}\t{observers}\t{collection_name}\t{observation['document']['documentId']}\thttps://tulokset.lintuatlas.fi/grid/{square_id}" | |
return row | |
def load_bird_association_areas(): | |
global token | |
url = f"https://api.laji.fi/v0/areas?type=birdAssociationArea&pageSize=50&access_token={token}" | |
print(url) | |
response = requests.get(url) | |
area_list = response.json()["results"] | |
return area_list | |
page_size = 1000 | |
page = 1 | |
time_range = "2022/2025" | |
area_list = load_bird_association_areas() | |
skipped_count = 0 | |
for area in area_list: | |
# Each area is a dictionary with id and name | |
society_id = area["id"] | |
society_name = area["name"] | |
observations_to_check = [] | |
base_url = f"https://api.laji.fi/v0/warehouse/query/unit/list?cache=true&useIdentificationAnnotations=true&includeSubTaxa=true&includeNonValidTaxa=true&informalTaxonGroupId=MVL.1&taxonRankId=MX.species&birdAssociationAreaId={society_id}&time={time_range}&season=0901%2F0331&individualCountMin=1&qualityIssues=NO_ISSUES&recordQuality=COMMUNITY_VERIFIED,NEUTRAL,EXPERT_VERIFIED&needsCheck=false&atlasClass=MY.atlasClassEnumB%2CMY.atlasClassEnumC%2CMY.atlasClassEnumD&selected=gathering.conversions.ykj10kmCenter.lat,gathering.conversions.ykj10kmCenter.lon,unit.linkings.taxon.nameFinnish,unit.linkings.taxon.scientificName,unit.unitId,document.documentId,gathering.displayDateTime,document.collectionId,gathering.team,unit.atlasCode,unit.atlasClass&access_token={token}" | |
all_data, count = fetch_all_data(base_url, page_size) | |
print(f"Fetched a total of {count} records") | |
#print(all_data) | |
squares_data = {} | |
# Loop through all data and aggregate by ykj10km | |
for observation in all_data: | |
if "conversions" not in observation["gathering"]: | |
print(observation) | |
exit() | |
lat = observation["gathering"]["conversions"]["ykj10kmCenter"]["lat"] | |
lon = observation["gathering"]["conversions"]["ykj10kmCenter"]["lon"] | |
square_id = f"{lat}:{lon}" | |
if square_id not in squares_data: | |
squares_data[square_id] = [] | |
squares_data[square_id].append(observation) | |
#print(squares_data) | |
# Loop the aggregated data one ykj10km square at a time | |
square_count = len(squares_data) | |
i = 0 | |
for square_id, observations in squares_data.items(): | |
i += 1 | |
print(f"Processing square {square_id}, {i}/{square_count}") | |
species_observed = get_square_data(square_id) | |
for observation in observations: | |
observer = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi" | |
if observer == skipped_names: | |
skipped_count += 1 | |
print(f"Skipping {observer} {skipped_count}") | |
continue | |
species_finnish = observation["unit"]["linkings"]["taxon"]["nameFinnish"] | |
atlas_code_string = observation["unit"]["atlasCode"].replace("http://tun.fi/MY.atlasCodeEnum", "") | |
if len(atlas_code_string) == 1: | |
atlas_code_string = atlas_code_string + "0" | |
atlas_code = int(atlas_code_string) | |
if species_finnish in species_observed: | |
if atlas_code >= species_observed[species_finnish]: | |
row = generate_observation_info(observation, square_id) | |
observations_to_check.append(row) | |
# Save as tsv | |
society_name = society_name.replace(" ", "_").replace(".", "") | |
# datetime as YYYYMMDDHHMM | |
datetime_str = datetime.now().strftime("%Y%m%d%H%M") | |
filename = f"./winter_output/winter_{society_name}_{datetime_str}.tsv" | |
with open(filename, "w") as f: | |
# Add header | |
f.write("species\tscientific_name\tatlas_code\tdate\tobserver\tcollection\tdocument_id\tsquare_link\n") | |
for row in observations_to_check: | |
f.write(row + "\n") | |
print(f"Saved {len(observations_to_check)} observations to check for {society_name}") | |
print(f"Skipped {skipped_count} observations") | |
print("--------------------------------------------------------------------------") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment