Skip to content

Instantly share code, notes, and snippets.

View monogenea's full-sized avatar

Francisco Lima monogenea

View GitHub Profile
#%% Define function to extract object coordinates if successful in detection
def where_is_it(frame, outputs):
frame_h = frame.shape[0]
frame_w = frame.shape[1]
bboxes, probs, class_ids = [], [], []
for preds in outputs: # different detection scales
hits = np.any(preds[:, 5:] > P_THRESH, axis=1) & (preds[:, 4] > OBJ_THRESH)
# Save prob and bbox coordinates if both objectness and probability pass respective thresholds
for i in np.where(hits)[0]:
pred = preds[i, :]
#%% Load YOLOv3 COCO weights, configs and class IDs
# Import class names
with open('yolov3/coco.names', 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
colors = np.random.randint(0, 255, (len(classes), 3))
# Give the configuration and weight files for the model and load the network using them
cfg = 'yolov3/yolov3.cfg'
weights = 'yolov3/yolov3.weights'
# Load model
#%% Imports and constants
import cv2, os
import numpy as np
import matplotlib.pyplot as plt
# Define objectness, prob and NMS thresholds
OBJ_THRESH = .6
P_THRESH = .6
NMS_THRESH = .5
#!/bin/bash
# Create subdirs
mkdir yolov3 input output
# Convert video (parse argument) to 720p mp4 without audio
echo "Converting $1 to MP4..."
ffmpeg -i $1 -vcodec h264 -vf scale=720:-2,setsar=1:1 -an input/input.mp4
# Get yolo dependencies
# Define UMAP
brain_umap = umap.UMAP(random_state=999, n_neighbors=30, min_dist=.25)
# Fit UMAP and extract latent vars 1-2
embedding = pd.DataFrame(brain_umap.fit_transform(matrix), columns = ['UMAP1','UMAP2'])
# Produce sns.scatterplot and pass metadata.subclasses as color
sns_plot = sns.scatterplot(x='UMAP1', y='UMAP2', data=embedding,
hue=metadata.subclass_label.to_list(),
alpha=.1, linewidth=0, s=1)
# Remove expression features with > 50% zero-valued expression levels
is_expressed = np.apply_along_axis(lambda x: np.mean(x == 0) < .5, arr=matrix, axis=0)
matrix = matrix[:,is_expressed.tolist()]
# Log2-transform
matrix = np.log2(matrix.to_numpy() + 1)
# Check first five columns in matrix.csv
#!cut -d, -f-5 matrix.csv | head
# Import data with Bash command discarding first column
matrix = dt.fread(cmd='cut -d, -f2- matrix.csv',
header=True, sep=',', columns=dt.int32) # ~7 GB (76533, 50281)
# Import metadata
metadata = pd.read_csv('metadata.csv')
# Imports
#!pip install datatable
import os, umap
import numpy as np
import pandas as pd
import datatable as dt
import seaborn as sns
os.chdir('PATH/TO/WDIR')
# read, downsample, clip, mel spec, normalize and remove noise
melspec <- function(x, start, end){
mp3 <- readMP3(filename = x) %>%
extractWave(xunit = "time",
from = start, to = end)
# return log-spectrogram with 256 Mel bands and compression
sp <- melfcc(mp3, nbands = 256, usecmp = T,
spec_out = T,
hoptime = (end-start) / 256)$aspectrum
# Test set prediction
predXProb <- predict(model, test$X)
predXClass <- speciesClass[apply(predXProb, 1, which.max)]
trueXClass <- speciesClass[apply(test$Y, 1, which.max)]
# Plot confusion matrix
confMatTest <- confusionMatrix(data = factor(predXClass, levels = speciesClass),
reference = factor(trueXClass, levels = speciesClass))
pheatmap(confMatTest$table, cluster_rows = F, cluster_cols = F,