Skip to content

Instantly share code, notes, and snippets.

@lfoppiano
lfoppiano / glutton.py
Created April 4, 2024 08:38
Lookup Open Access PDF files from a list of DOIs using Biblio Glutton https://github.com/kermitt2/biblio-glutton
import argparse
import os
from pathlib import Path
import requests
# Constants
GLUTTON_URL = "ADD BIBLIO GLUTTON LOOKUP SERVICE"
@lfoppiano
lfoppiano / nvidia-benchmark.py
Last active September 10, 2021 08:05
NVIDIA benchmark
# Credits to https://marmelab.com/blog/2018/03/21/using-nvidia-gpu-within-docker-container.html
# Run with
# [CPU] docker run --runtime=nvidia --rm -ti -v "${PWD}:/app" tensorflow/tensorflow:1.15.5-gpu python /app/nvidia-benchmark.py cpu 10000
# [GPU] docker run --runtime=nvidia --rm -ti -v "${PWD}:/app" tensorflow/tensorflow:1.15.5-gpu python /app/nvidia-benchmark.py gpu 10000
import sys
import numpy as np
import tensorflow as tf
@lfoppiano
lfoppiano / JProfiler-with-Docker.md
Last active March 23, 2021 06:24 — forked from kevin-lee/JProfiler-with-Docker.md
JVM Profiler with Docker

JProfiler with Docker

Docker

DockerFile

DockerFile should have JProfiler installation.

RUN wget <JProfiler file location> -P /tmp/ && \
  tar -xzf /tmp/<JProfiler file> -C /usr/local && \
  rm /tmp/<JProfiler file>
@lfoppiano
lfoppiano / soft_matching
Last active March 23, 2021 06:27
How to match element in two list of strings using a soft matching
from difflib import SequenceMatcher
def group_by_with_soft_matching(input_list, threshold):
matching = {}
last_matching = -1
input_list_sorted = sorted(list(set(input_list)), reverse=True)
for index_x, x in enumerate(input_list_sorted):
unpacked = [y for x in matching for y in matching[x]]
@lfoppiano
lfoppiano / preprocessor_migration.py
Created August 11, 2020 01:03
Migrate delft preprocessors to JSON
import json
import os
import pathlib
import sys
from delft.sequenceLabelling.preprocess import WordPreprocessor
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Invalid parameters. Usage: python json_migration.py model directory. "
@lfoppiano
lfoppiano / find_duplicates.sh
Last active May 16, 2020 01:57
Find duplicated files
# Just return the sha of the duplicated files
sha1sum * | gsort | gawk '{a[$1]++}END{for(i in a){if(a[i]-1)print i, a[i]}}'
# Return the last file name for each duplicated files
sha1sum * | gsort | gawk '{a[$1]++; b[$1]=$2}END{for(i in a){if(a[i]-1)print i, b[i]}}'
@lfoppiano
lfoppiano / recipe.py
Created March 6, 2019 23:34
prodigy recipe
import prodigy
from prodigy.components.loaders import JSONL
from prodigy.util import split_string
@prodigy.recipe('superconductor-material-recipe',
dataset=prodigy.recipe_args['dataset'],
source=("The source data as a JSONL file", "positional", None, str),
label=("One or more comma-separated labels", "option", "l", split_string))
def superconductors_detection(dataset, source=None, label=None):