Skip to content

Instantly share code, notes, and snippets.

@mikkohei13
Created June 1, 2023 08:39
Show Gist options
  • Save mikkohei13/2730b5d538b494bb535e19e1435f0723 to your computer and use it in GitHub Desktop.
Save mikkohei13/2730b5d538b494bb535e19e1435f0723 to your computer and use it in GitHub Desktop.
Script reads laji.fi data file, and a heatmap of weekly species distribution.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
rarity_threshold = 100
# Read datafile that has number of observations for each species
df = pd.read_csv('species_rarity.csv', sep='\t')
species_rarity = pd.Series(df['count'].values, index=df['sciname']).to_dict()
def remove_long_gatherings(row):
if row['Gathering.Conversions.DayOfYearEnd'] > row['Gathering.Conversions.DayOfYearBegin'] + 3:
return False
return True
def remove_non_species(row):
species = row['Taxon.ScientificName']
if not isinstance(species, str):
return False
if " " in species:
return True
return False
def remove_rarities(row):
if row['Taxon.ScientificName'] not in species_rarity:
return False
if species_rarity[row['Taxon.ScientificName']] < rarity_threshold:
return False
else:
return True
def remove_commons(row):
if row['Taxon.ScientificName'] not in species_rarity:
return True
if species_rarity[row['Taxon.ScientificName']] >= rarity_threshold:
return False
else:
return True
# Read an observation datafile
data = pd.read_csv('luteet-suomi-2000.csv', sep=';')
size_x = 20
size_y = 130
fontsize = 8
# Filtering the data
data = data[data.apply(remove_rarities, axis=1)]
data = data[data.apply(remove_long_gatherings, axis=1)]
data = data[data.apply(remove_non_species, axis=1)]
medians = data.groupby('Taxon.ScientificName')['Gathering.Conversions.DayOfYearBegin'].median().sort_values()
# convert 'day_of_year' to week_of_year
data['week_of_year'] = data['Gathering.Conversions.DayOfYearBegin'] // 7
# create a new dataframe where the rows are species, the columns are weeks of the year,
# and the values are the count of observations
heatmap_data = data.groupby(['Taxon.ScientificName', 'week_of_year']).size().unstack(fill_value=0)
# Align the 'heatmap_data' index with 'medians' index.
heatmap_data = heatmap_data.reindex(medians.index)
# plot the heatmap
plt.figure(figsize=(size_x, size_y))
ax = sns.heatmap(heatmap_data, cmap='vlag')
ax.tick_params(axis='y', labelsize=fontsize)
# set labels
plt.xlabel("Week of Year")
plt.ylabel("Species")
plt.title("Observations of Species over Time")
# save the plot as an image file
plt.savefig('heatmap.png', dpi=300)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment