Created
June 1, 2023 08:39
-
-
Save mikkohei13/2730b5d538b494bb535e19e1435f0723 to your computer and use it in GitHub Desktop.
Script reads laji.fi data file, and a heatmap of weekly species distribution.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from sklearn.preprocessing import MinMaxScaler | |
rarity_threshold = 100 | |
# Read datafile that has number of observations for each species | |
df = pd.read_csv('species_rarity.csv', sep='\t') | |
species_rarity = pd.Series(df['count'].values, index=df['sciname']).to_dict() | |
def remove_long_gatherings(row): | |
if row['Gathering.Conversions.DayOfYearEnd'] > row['Gathering.Conversions.DayOfYearBegin'] + 3: | |
return False | |
return True | |
def remove_non_species(row): | |
species = row['Taxon.ScientificName'] | |
if not isinstance(species, str): | |
return False | |
if " " in species: | |
return True | |
return False | |
def remove_rarities(row): | |
if row['Taxon.ScientificName'] not in species_rarity: | |
return False | |
if species_rarity[row['Taxon.ScientificName']] < rarity_threshold: | |
return False | |
else: | |
return True | |
def remove_commons(row): | |
if row['Taxon.ScientificName'] not in species_rarity: | |
return True | |
if species_rarity[row['Taxon.ScientificName']] >= rarity_threshold: | |
return False | |
else: | |
return True | |
# Read an observation datafile | |
data = pd.read_csv('luteet-suomi-2000.csv', sep=';') | |
size_x = 20 | |
size_y = 130 | |
fontsize = 8 | |
# Filtering the data | |
data = data[data.apply(remove_rarities, axis=1)] | |
data = data[data.apply(remove_long_gatherings, axis=1)] | |
data = data[data.apply(remove_non_species, axis=1)] | |
medians = data.groupby('Taxon.ScientificName')['Gathering.Conversions.DayOfYearBegin'].median().sort_values() | |
# convert 'day_of_year' to week_of_year | |
data['week_of_year'] = data['Gathering.Conversions.DayOfYearBegin'] // 7 | |
# create a new dataframe where the rows are species, the columns are weeks of the year, | |
# and the values are the count of observations | |
heatmap_data = data.groupby(['Taxon.ScientificName', 'week_of_year']).size().unstack(fill_value=0) | |
# Align the 'heatmap_data' index with 'medians' index. | |
heatmap_data = heatmap_data.reindex(medians.index) | |
# plot the heatmap | |
plt.figure(figsize=(size_x, size_y)) | |
ax = sns.heatmap(heatmap_data, cmap='vlag') | |
ax.tick_params(axis='y', labelsize=fontsize) | |
# set labels | |
plt.xlabel("Week of Year") | |
plt.ylabel("Species") | |
plt.title("Observations of Species over Time") | |
# save the plot as an image file | |
plt.savefig('heatmap.png', dpi=300) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment