mikkohei13 · March 14, 2025 18:00
diff --git a/atlas_winter.py b/atlas_winter.py
 # Script that fetches winter period atlas observations and checks which of them might affect atlas results.

 import os
 import requests
 import json
 import time
 from datetime import datetime

 # Fetch atlas observations

 token = "YOU TOKEN HERE"
 skipped_names = "NAMES TO FILTER OUT"

 def get_collection_name(collection_id):
    global token
    
    collection_id = collection_id.replace("http://tun.fi/", "")
    
    cache_dir = "./cache"
    
    # Check if collection name is already cached
    cache_file = os.path.join(cache_dir, f"{collection_id}.txt")
    if os.path.exists(cache_file):
 #        print(f"Collection name for {collection_id} already cached")
        with open(cache_file, "r") as f:
            return f.read().strip()
    
    # If not cached, fetch from API
    # Add a small delay to avoid overwhelming the API
    time.sleep(0.2)

    url = f"https://api.laji.fi/v0/collections/{collection_id}?lang=fi&langFallback=true&access_token={token}"
 #    print(f"Fetching collection name for {collection_id} from API url {url}")
    response = requests.get(url)
    
    collection_name = response.json()["longName"]
    
    # Cache the result
    with open(cache_file, "w") as f:
        f.write(collection_name)
    
    return collection_name


 def fetch_all_data(base_url, page_size=100):
    """
    Fetch all data from api.laji.fi by paginating through all available pages.
    
    Args:
        base_url (str): The base URL for the API request without page and pageSize parameters
        page_size (int, optional): Number of records per page. Defaults to 100.
    
    Returns:
        tuple: (all_results, total_count) where all_results is a list of all data dictionaries
               and total_count is the total number of records fetched
    """
    all_results = []
    page = 1
    total_pages = None
    total_count = 0
    
    while total_pages is None or page <= total_pages:
        # Construct URL with pagination parameters
        if '?' in base_url:
            url = f"{base_url}&pageSize={page_size}&page={page}"
        else:
            url = f"{base_url}?pageSize={page_size}&page={page}"
        
        print(f"Fetching page {page}...")
        response = requests.get(url)
        
        # Check if request was successful
        if response.status_code != 200:
            print(f"Error fetching data: {response.status_code}")
            print(response.text)
            break
        
        data = response.json()
        
        # Get pagination information
        current_page = data.get('currentPage', 1)
        total_pages = data.get('lastPage', 1)
        total_count = data.get('total', 0)
        
        # Add results to our collection
        results = data.get('results', [])
        all_results.extend(results)
        
        print(f"Fetched page {current_page}/{total_pages}, got {len(results)} records")
        
        # Move to next page
        page += 1
        
        # Add a small delay to avoid overwhelming the API
        time.sleep(0.2)
    
    return all_results, len(all_results)


 def get_square_data(square_id):
    # Add a small delay to avoid overwhelming the API
    time.sleep(0.2)

    print(f"Fetching square data for {square_id}")
    url = f"https://atlas-api.2.rahtiapp.fi/api/v1/grid/{square_id}/atlas"
    response = requests.get(url)
    
    species_observed = dict()
    for species in response.json()["data"]:
        species_finnish = species["speciesName"]
        atlas_code_string = species["atlasCode"]["key"].replace("MY.atlasCodeEnum", "")

        if len(atlas_code_string) == 1:
            atlas_code_string = atlas_code_string + "0"

        atlas_code = int(atlas_code_string)
        species_observed[species_finnish] = atlas_code

    return species_observed


 def generate_observation_info(observation, square_id):
    observers = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi"

    collection_name = get_collection_name(observation['document']['collectionId'])

    row = f"{observation['unit']['linkings']['taxon']['nameFinnish']}\t{observation['unit']['linkings']['taxon']['scientificName']}\t{observation['unit']['atlasCode'].replace('http://tun.fi/MY.atlasCodeEnum', '')}\t{observation['gathering']['displayDateTime']}\t{observers}\t{collection_name}\t{observation['document']['documentId']}\thttps://tulokset.lintuatlas.fi/grid/{square_id}"
    return row


 def load_bird_association_areas():
    global token
    url = f"https://api.laji.fi/v0/areas?type=birdAssociationArea&pageSize=50&access_token={token}"
    print(url)
    response = requests.get(url)
    area_list = response.json()["results"]
    return area_list


 page_size = 1000
 page = 1

 time_range = "2022/2025"

 area_list = load_bird_association_areas()

 skipped_count = 0

 for area in area_list:
    # Each area is a dictionary with id and name
    society_id = area["id"]
    society_name = area["name"]

    observations_to_check = []

    base_url = f"https://api.laji.fi/v0/warehouse/query/unit/list?cache=true&useIdentificationAnnotations=true&includeSubTaxa=true&includeNonValidTaxa=true&informalTaxonGroupId=MVL.1&taxonRankId=MX.species&birdAssociationAreaId={society_id}&time={time_range}&season=0901%2F0331&individualCountMin=1&qualityIssues=NO_ISSUES&recordQuality=COMMUNITY_VERIFIED,NEUTRAL,EXPERT_VERIFIED&needsCheck=false&atlasClass=MY.atlasClassEnumB%2CMY.atlasClassEnumC%2CMY.atlasClassEnumD&selected=gathering.conversions.ykj10kmCenter.lat,gathering.conversions.ykj10kmCenter.lon,unit.linkings.taxon.nameFinnish,unit.linkings.taxon.scientificName,unit.unitId,document.documentId,gathering.displayDateTime,document.collectionId,gathering.team,unit.atlasCode,unit.atlasClass&access_token={token}"

    all_data, count = fetch_all_data(base_url, page_size)
    print(f"Fetched a total of {count} records")

    #print(all_data)

    squares_data = {}

    # Loop through all data and aggregate by ykj10km
    for observation in all_data:
        if "conversions" not in observation["gathering"]:
            print(observation)
            exit()

        lat = observation["gathering"]["conversions"]["ykj10kmCenter"]["lat"]
        lon = observation["gathering"]["conversions"]["ykj10kmCenter"]["lon"]
        square_id = f"{lat}:{lon}"
        if square_id not in squares_data:
            squares_data[square_id] = []
        squares_data[square_id].append(observation)

    #print(squares_data)

    # Loop the aggregated data one ykj10km square at a time
    square_count = len(squares_data)
    i = 0
    for square_id, observations in squares_data.items():
        i += 1
        print(f"Processing square {square_id}, {i}/{square_count}")
        species_observed = get_square_data(square_id)

        for observation in observations:
            observer = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi"                
            if observer == skipped_names:
                skipped_count += 1
                print(f"Skipping {observer} {skipped_count}")
                continue

            species_finnish = observation["unit"]["linkings"]["taxon"]["nameFinnish"]
            atlas_code_string = observation["unit"]["atlasCode"].replace("http://tun.fi/MY.atlasCodeEnum", "")
            if len(atlas_code_string) == 1:
                atlas_code_string = atlas_code_string + "0"
            atlas_code = int(atlas_code_string)

            if species_finnish in species_observed:
                if atlas_code >= species_observed[species_finnish]:
                    row = generate_observation_info(observation, square_id)
                    observations_to_check.append(row)


    # Save as tsv
    society_name = society_name.replace(" ", "_").replace(".", "")

    # datetime as YYYYMMDDHHMM
    datetime_str = datetime.now().strftime("%Y%m%d%H%M")
    filename = f"./winter_output/winter_{society_name}_{datetime_str}.tsv"

    with open(filename, "w") as f:
        # Add header
        f.write("species\tscientific_name\tatlas_code\tdate\tobserver\tcollection\tdocument_id\tsquare_link\n")
        for row in observations_to_check:
            f.write(row + "\n")

        print(f"Saved {len(observations_to_check)} observations to check for {society_name}")
        print(f"Skipped {skipped_count} observations")
        print("--------------------------------------------------------------------------")
	# Script that fetches winter period atlas observations and checks which of them might affect atlas results.

	import os
	import requests
	import json
	import time
	from datetime import datetime

	# Fetch atlas observations

	token = "YOU TOKEN HERE"
	skipped_names = "NAMES TO FILTER OUT"

	def get_collection_name(collection_id):
	global token

	collection_id = collection_id.replace("http://tun.fi/", "")

	cache_dir = "./cache"

	# Check if collection name is already cached
	cache_file = os.path.join(cache_dir, f"{collection_id}.txt")
	if os.path.exists(cache_file):
	# print(f"Collection name for {collection_id} already cached")
	with open(cache_file, "r") as f:
	return f.read().strip()

	# If not cached, fetch from API
	# Add a small delay to avoid overwhelming the API
	time.sleep(0.2)

	url = f"https://api.laji.fi/v0/collections/{collection_id}?lang=fi&langFallback=true&access_token={token}"
	# print(f"Fetching collection name for {collection_id} from API url {url}")
	response = requests.get(url)

	collection_name = response.json()["longName"]

	# Cache the result
	with open(cache_file, "w") as f:
	f.write(collection_name)

	return collection_name


	def fetch_all_data(base_url, page_size=100):
	"""
	Fetch all data from api.laji.fi by paginating through all available pages.

	Args:
	base_url (str): The base URL for the API request without page and pageSize parameters
	page_size (int, optional): Number of records per page. Defaults to 100.

	Returns:
	tuple: (all_results, total_count) where all_results is a list of all data dictionaries
	and total_count is the total number of records fetched
	"""
	all_results = []
	page = 1
	total_pages = None
	total_count = 0

	while total_pages is None or page <= total_pages:
	# Construct URL with pagination parameters
	if '?' in base_url:
	url = f"{base_url}&pageSize={page_size}&page={page}"
	else:
	url = f"{base_url}?pageSize={page_size}&page={page}"

	print(f"Fetching page {page}...")
	response = requests.get(url)

	# Check if request was successful
	if response.status_code != 200:
	print(f"Error fetching data: {response.status_code}")
	print(response.text)
	break

	data = response.json()

	# Get pagination information
	current_page = data.get('currentPage', 1)
	total_pages = data.get('lastPage', 1)
	total_count = data.get('total', 0)

	# Add results to our collection
	results = data.get('results', [])
	all_results.extend(results)

	print(f"Fetched page {current_page}/{total_pages}, got {len(results)} records")

	# Move to next page
	page += 1

	# Add a small delay to avoid overwhelming the API
	time.sleep(0.2)

	return all_results, len(all_results)


	def get_square_data(square_id):
	# Add a small delay to avoid overwhelming the API
	time.sleep(0.2)

	print(f"Fetching square data for {square_id}")
	url = f"https://atlas-api.2.rahtiapp.fi/api/v1/grid/{square_id}/atlas"
	response = requests.get(url)

	species_observed = dict()
	for species in response.json()["data"]:
	species_finnish = species["speciesName"]
	atlas_code_string = species["atlasCode"]["key"].replace("MY.atlasCodeEnum", "")

	if len(atlas_code_string) == 1:
	atlas_code_string = atlas_code_string + "0"

	atlas_code = int(atlas_code_string)
	species_observed[species_finnish] = atlas_code

	return species_observed


	def generate_observation_info(observation, square_id):
	observers = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi"

	collection_name = get_collection_name(observation['document']['collectionId'])

	row = f"{observation['unit']['linkings']['taxon']['nameFinnish']}\t{observation['unit']['linkings']['taxon']['scientificName']}\t{observation['unit']['atlasCode'].replace('http://tun.fi/MY.atlasCodeEnum', '')}\t{observation['gathering']['displayDateTime']}\t{observers}\t{collection_name}\t{observation['document']['documentId']}\thttps://tulokset.lintuatlas.fi/grid/{square_id}"
	return row


	def load_bird_association_areas():
	global token
	url = f"https://api.laji.fi/v0/areas?type=birdAssociationArea&pageSize=50&access_token={token}"
	print(url)
	response = requests.get(url)
	area_list = response.json()["results"]
	return area_list


	page_size = 1000
	page = 1

	time_range = "2022/2025"

	area_list = load_bird_association_areas()

	skipped_count = 0

	for area in area_list:
	# Each area is a dictionary with id and name
	society_id = area["id"]
	society_name = area["name"]

	observations_to_check = []

	base_url = f"https://api.laji.fi/v0/warehouse/query/unit/list?cache=true&useIdentificationAnnotations=true&includeSubTaxa=true&includeNonValidTaxa=true&informalTaxonGroupId=MVL.1&taxonRankId=MX.species&birdAssociationAreaId={society_id}&time={time_range}&season=0901%2F0331&individualCountMin=1&qualityIssues=NO_ISSUES&recordQuality=COMMUNITY_VERIFIED,NEUTRAL,EXPERT_VERIFIED&needsCheck=false&atlasClass=MY.atlasClassEnumB%2CMY.atlasClassEnumC%2CMY.atlasClassEnumD&selected=gathering.conversions.ykj10kmCenter.lat,gathering.conversions.ykj10kmCenter.lon,unit.linkings.taxon.nameFinnish,unit.linkings.taxon.scientificName,unit.unitId,document.documentId,gathering.displayDateTime,document.collectionId,gathering.team,unit.atlasCode,unit.atlasClass&access_token={token}"

	all_data, count = fetch_all_data(base_url, page_size)
	print(f"Fetched a total of {count} records")

	#print(all_data)

	squares_data = {}

	# Loop through all data and aggregate by ykj10km
	for observation in all_data:
	if "conversions" not in observation["gathering"]:
	print(observation)
	exit()

	lat = observation["gathering"]["conversions"]["ykj10kmCenter"]["lat"]
	lon = observation["gathering"]["conversions"]["ykj10kmCenter"]["lon"]
	square_id = f"{lat}:{lon}"
	if square_id not in squares_data:
	squares_data[square_id] = []
	squares_data[square_id].append(observation)

	#print(squares_data)

	# Loop the aggregated data one ykj10km square at a time
	square_count = len(squares_data)
	i = 0
	for square_id, observations in squares_data.items():
	i += 1
	print(f"Processing square {square_id}, {i}/{square_count}")
	species_observed = get_square_data(square_id)

	for observation in observations:
	observer = ", ".join(observation['gathering']['team']) if 'team' in observation['gathering'] and observation['gathering']['team'] else "Anonyymi"
	if observer == skipped_names:
	skipped_count += 1
	print(f"Skipping {observer} {skipped_count}")
	continue

	species_finnish = observation["unit"]["linkings"]["taxon"]["nameFinnish"]
	atlas_code_string = observation["unit"]["atlasCode"].replace("http://tun.fi/MY.atlasCodeEnum", "")
	if len(atlas_code_string) == 1:
	atlas_code_string = atlas_code_string + "0"
	atlas_code = int(atlas_code_string)

	if species_finnish in species_observed:
	if atlas_code >= species_observed[species_finnish]:
	row = generate_observation_info(observation, square_id)
	observations_to_check.append(row)


	# Save as tsv
	society_name = society_name.replace(" ", "_").replace(".", "")

	# datetime as YYYYMMDDHHMM
	datetime_str = datetime.now().strftime("%Y%m%d%H%M")
	filename = f"./winter_output/winter_{society_name}_{datetime_str}.tsv"

	with open(filename, "w") as f:
	# Add header
	f.write("species\tscientific_name\tatlas_code\tdate\tobserver\tcollection\tdocument_id\tsquare_link\n")
	for row in observations_to_check:
	f.write(row + "\n")

	print(f"Saved {len(observations_to_check)} observations to check for {society_name}")
	print(f"Skipped {skipped_count} observations")
	print("--------------------------------------------------------------------------")