Skip to content

Instantly share code, notes, and snippets.

@mikkohei13
Created June 1, 2023 08:34
Show Gist options
  • Save mikkohei13/52b6635b8089db367b869b07627c977a to your computer and use it in GitHub Desktop.
Save mikkohei13/52b6635b8089db367b869b07627c977a to your computer and use it in GitHub Desktop.
Script reads laji.fi citatable data file, and creates Excel file and a line chart of weekly species distribution.
import pandas as pd
import matplotlib.pyplot as plt
# Load the csv file
df = pd.read_csv('rows_HBF.75704.tsv', sep='\t')
# Remove those without a day of year
df = df.dropna(subset=['Gathering.Conversions.DayOfYearBegin'])
# Set day of years as integers
df['Gathering.Conversions.DayOfYearBegin'] = df['Gathering.Conversions.DayOfYearBegin'].astype(int)
df['Gathering.Conversions.DayOfYearEnd'] = df['Gathering.Conversions.DayOfYearEnd'].astype(int)
# Filter out observations that have a long timespan
filtered_df = df[df['Gathering.Conversions.DayOfYearEnd'] - df['Gathering.Conversions.DayOfYearBegin'] <= 5]
# Convert day of the year to week of the year
df['week_of_year'] = df['Gathering.Conversions.DayOfYearBegin'].apply(lambda x: (x-1)//7+1)
# Combine common & scientific name
df['name'] = df['Taxon.FinnishName'].astype(str).str.capitalize() + " (" + df['Taxon.ScientificName'].astype(str) + ")"
# Calculate weekly counts
weekly_counts = df.groupby(['name', 'week_of_year']).size().reset_index(name='weekly_counts')
# Calculate total counts
total_counts = df.groupby('name').size().reset_index(name='total_counts')
# Merge weekly and total counts
df_counts = pd.merge(weekly_counts, total_counts, on='name')
# Calculate the percentage
df_counts['percentage'] = (df_counts['weekly_counts'] / df_counts['total_counts']) * 100
# Filter out weeks with percentage
result = df_counts[df_counts['percentage'] > 10]
print(result)
result.to_excel('weeks.xlsx', index=False)
# Count the unique species in each week
unique_species_per_week = result.groupby('week_of_year')['name'].nunique()
# Create a line chart
plt.figure(figsize=(10,6))
plt.plot(unique_species_per_week.index, unique_species_per_week.values)
plt.xlabel('Viikko')
plt.ylabel('Lajimäärä')
plt.title('Lajit, joiden havainnoista 10 % ko. viikolta')
plt.grid(True)
plt.savefig('line_chart.png') # save the plot to a file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment