mikkohei13 · June 1, 2023 08:39
diff --git a/weekly_species_heatmaps.py b/weekly_species_heatmaps.py
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt

 from sklearn.preprocessing import MinMaxScaler

 rarity_threshold = 100 

 # Read datafile that has number of observations for each species
 df = pd.read_csv('species_rarity.csv', sep='\t')
 species_rarity = pd.Series(df['count'].values, index=df['sciname']).to_dict()


 def remove_long_gatherings(row):
    if row['Gathering.Conversions.DayOfYearEnd'] > row['Gathering.Conversions.DayOfYearBegin'] + 3:
        return False
    return True


 def remove_non_species(row):
    species = row['Taxon.ScientificName']
    if not isinstance(species, str):
        return False
    if " " in species:
        return True
    return False


 def remove_rarities(row):
    if row['Taxon.ScientificName'] not in species_rarity:
        return False
    if species_rarity[row['Taxon.ScientificName']] < rarity_threshold:
        return False
    else:
        return True


 def remove_commons(row):
    if row['Taxon.ScientificName'] not in species_rarity:
        return True
    if species_rarity[row['Taxon.ScientificName']] >= rarity_threshold:
        return False
    else:
        return True


 # Read an observation datafile 
 data = pd.read_csv('luteet-suomi-2000.csv', sep=';')
 size_x = 20
 size_y = 130
 fontsize = 8

 # Filtering the data
 data = data[data.apply(remove_rarities, axis=1)]
 data = data[data.apply(remove_long_gatherings, axis=1)]
 data = data[data.apply(remove_non_species, axis=1)]

 medians = data.groupby('Taxon.ScientificName')['Gathering.Conversions.DayOfYearBegin'].median().sort_values()

 # convert 'day_of_year' to week_of_year
 data['week_of_year'] = data['Gathering.Conversions.DayOfYearBegin'] // 7

 # create a new dataframe where the rows are species, the columns are weeks of the year, 
 # and the values are the count of observations
 heatmap_data = data.groupby(['Taxon.ScientificName', 'week_of_year']).size().unstack(fill_value=0)

 # Align the 'heatmap_data' index with 'medians' index.
 heatmap_data = heatmap_data.reindex(medians.index)

 # plot the heatmap
 plt.figure(figsize=(size_x, size_y))
 ax = sns.heatmap(heatmap_data, cmap='vlag')
 ax.tick_params(axis='y', labelsize=fontsize)

 # set labels
 plt.xlabel("Week of Year")
 plt.ylabel("Species")
 plt.title("Observations of Species over Time")

 # save the plot as an image file
 plt.savefig('heatmap.png', dpi=300)
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt

	from sklearn.preprocessing import MinMaxScaler

	rarity_threshold = 100

	# Read datafile that has number of observations for each species
	df = pd.read_csv('species_rarity.csv', sep='\t')
	species_rarity = pd.Series(df['count'].values, index=df['sciname']).to_dict()


	def remove_long_gatherings(row):
	if row['Gathering.Conversions.DayOfYearEnd'] > row['Gathering.Conversions.DayOfYearBegin'] + 3:
	return False
	return True


	def remove_non_species(row):
	species = row['Taxon.ScientificName']
	if not isinstance(species, str):
	return False
	if " " in species:
	return True
	return False


	def remove_rarities(row):
	if row['Taxon.ScientificName'] not in species_rarity:
	return False
	if species_rarity[row['Taxon.ScientificName']] < rarity_threshold:
	return False
	else:
	return True


	def remove_commons(row):
	if row['Taxon.ScientificName'] not in species_rarity:
	return True
	if species_rarity[row['Taxon.ScientificName']] >= rarity_threshold:
	return False
	else:
	return True


	# Read an observation datafile
	data = pd.read_csv('luteet-suomi-2000.csv', sep=';')
	size_x = 20
	size_y = 130
	fontsize = 8

	# Filtering the data
	data = data[data.apply(remove_rarities, axis=1)]
	data = data[data.apply(remove_long_gatherings, axis=1)]
	data = data[data.apply(remove_non_species, axis=1)]

	medians = data.groupby('Taxon.ScientificName')['Gathering.Conversions.DayOfYearBegin'].median().sort_values()

	# convert 'day_of_year' to week_of_year
	data['week_of_year'] = data['Gathering.Conversions.DayOfYearBegin'] // 7

	# create a new dataframe where the rows are species, the columns are weeks of the year,
	# and the values are the count of observations
	heatmap_data = data.groupby(['Taxon.ScientificName', 'week_of_year']).size().unstack(fill_value=0)

	# Align the 'heatmap_data' index with 'medians' index.
	heatmap_data = heatmap_data.reindex(medians.index)

	# plot the heatmap
	plt.figure(figsize=(size_x, size_y))
	ax = sns.heatmap(heatmap_data, cmap='vlag')
	ax.tick_params(axis='y', labelsize=fontsize)

	# set labels
	plt.xlabel("Week of Year")
	plt.ylabel("Species")
	plt.title("Observations of Species over Time")

	# save the plot as an image file
	plt.savefig('heatmap.png', dpi=300)