Last active
June 26, 2022 07:46
-
-
Save mikkohei13/11d7dae81c376b4cfaedacbb1302b7bd to your computer and use it in GitHub Desktop.
Tool to concatenate and extend BirdNET CSV files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import datetime | |
import re | |
def get_datafile_list(directory, limit): | |
datafile_list = [] | |
objects = os.listdir(directory) | |
i = 0 | |
for filename in objects: | |
if filename.lower().endswith(".results.csv"): | |
datafile_list.append(filename) | |
i = i + 1 | |
if i >= limit: | |
break | |
return datafile_list | |
# Todo: identify filename format: Audiomoth & SM4 | |
def datetime_from_filename(filename): | |
parts = filename.split(".") | |
datepart = parts[0] | |
# Note: order of these if-clauses are important. | |
# Wildlife Acoustics SM4 | |
pattern = re.compile("[A-Z0-9]{5}\_\d{8}\_\d{6}") | |
if pattern.match(datepart): | |
datepart = datepart[6:] | |
return datetime.datetime.strptime(datepart, "%Y%m%d_%H%M%S") | |
# Audiomoth new firmware | |
pattern = re.compile("\d{8}\_\d{6}") | |
if pattern.match(datepart): | |
return datetime.datetime.strptime(datepart, "%Y%m%d_%H%M%S") | |
# Audiomoth old firmware, 32-bit hexadecimal UNIX timestamp | |
pattern = re.compile("[A-F0-9]{8}") | |
if pattern.match(datepart): | |
unix_seconds = int(datepart, 16) | |
return datetime.datetime.fromtimestamp(unix_seconds) | |
return datepart | |
def audio_filename_from_filename(filename): | |
# Problem: cannot get original file extension, since it's not available on the Birdnet analysis files | |
# Presume ".wav" | |
parts = filename.split(".") | |
return parts[0] + ".wav" | |
dir = "test" | |
dir = "/mnt/c/Users/mikko/Documents/Audiomoth_2022/dirname" | |
limit = 2000 | |
datafile_list = get_datafile_list(dir, limit) | |
datafile_list.sort() | |
print(datafile_list) | |
df_list = [] | |
for filename in datafile_list: | |
df = pd.read_csv(dir + "/" + filename) | |
if df.empty: | |
continue | |
# ~Audio filename | |
df['Filename'] = audio_filename_from_filename(filename) | |
# Datetime | |
df['Date (UTC)'] = datetime_from_filename(filename) | |
# Start time in h:m:s | |
df['Start (h:m:s)'] = df.apply(lambda row: str(datetime.timedelta(seconds= row['Start (s)'])), axis = 1) | |
# Confidence comma-separated | |
df['Confidence (cs)'] = df.apply(lambda row: str(row['Confidence']).replace(".", ","), axis = 1) | |
df_list.append(df) | |
print("Handled file " + filename) | |
full_df = pd.concat(df_list, ignore_index=True) | |
full_df.to_csv(dir + "/birdnet-output.csv", index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment