Created
November 23, 2023 21:13
-
-
Save mikkohei13/901fa3eea5c1c3f75e2bed68b4872cda to your computer and use it in GitHub Desktop.
Find temporal outliers in FinBIF simple datafile
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Made with ChatGPT GPT-4 2023-11-23 | |
# Finds temporal outliers from a simple occurrence data file of a single species from FinBIF, with headers in English. | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from shapely.geometry import Point | |
import geopandas as gpd | |
# Load the data | |
file_path = 'single_species_data.tsv' # Replace with your file path | |
data = pd.read_csv(file_path, sep='\t') | |
# Function to find outliers | |
def find_outliers(df, column): | |
Q1 = df[column].quantile(0.25) | |
Q3 = df[column].quantile(0.75) | |
IQR = Q3 - Q1 | |
lower_bound = Q1 - 1.5 * IQR | |
upper_bound = Q3 + 1.5 * IQR | |
outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)] | |
return outliers | |
# Filter data for specific conditions | |
filtered_data = data[(data['Life stage'].isna()) | (data['Life stage'] == 'adult')] | |
filtered_data_same_day = filtered_data[filtered_data['Begin day of year'] == filtered_data['End day of year']] | |
# Find outliers in the filtered dataset | |
begin_day_outliers_filtered = find_outliers(filtered_data_same_day, 'Begin day of year') | |
end_day_outliers_filtered = find_outliers(filtered_data_same_day, 'End day of year') | |
# Combine the outliers and get unique observation identifiers | |
outlier_identifiers_filtered = pd.concat([begin_day_outliers_filtered, end_day_outliers_filtered])['Observation identifier'].unique() | |
print(outlier_identifiers_filtered.tolist()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment