Chase Clark chasemc

Counting Lines

I have a lot of files, in a lot of nested directories: 347430 directories, 379286 files

To anonymize what I’m doing we’ll say I have two types of files: apples.csv.gz and oranges.csv.gz (if you don’t know- “gz” means the


	library(ggplot2)
	library(data.table)
	library(geofacet)
	library(magrittr)

	raw_data <- data.table::fread("http://covidtracking.com/api/states/daily.csv")
	raw_data$date <- as.Date(as.character(raw_data$date), "%Y%m%d")

	raw_data <- raw_data[date > "2020-03-15", ]



	old_files_path <- "/home/user/Downloads/db/old"
	a <- list.files(old_files_path, full.names = FALSE)
	a <- tools::file_path_sans_ext(a)
	for(i in a) {
	pool <- IDBacApp::idbac_connect(fileName = i,
	filePath = old_files_path)[[1]]
	IDBacApp::idbac_update_db(pool = pool,
	copy_overwrite = "copy")

	import os
	import pathlib
	from rich.console import Console
	from rich.table import Table
	from rich.table import Column
	import pandas as pd


	def make_rich(df, title="mytitle"):
	table = Table(title=title)

	https://github.com/Micromeda/InterProScan-Docker/blob/master/LICENSE

	Apache License
	Version 2.0, January 2004
	http://www.apache.org/licenses/

	TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

	1. Definitions.

	#!/usr/bin/bash
	curl -s $1 \|\
	sed -ne '/# Sequence-Name\tSequence-Role\tAssigned-Molecule\tAssigned-Molecule-Location\/Type\tGenBank-Accn\tRelationship\tRefSeq-Accn\tAssembly-Unit\tSequence-Length\tUCSC-style-name/,$ p' \|\
	awk -F"\t" 'NR==1 {for (i=1; i<=NF; i++) {f[$i] = i}}{ print $(f["RefSeq-Accn"]), $(f["Sequence-Length"])}' \|\
	sed 1d

	#!/usr/bin/env Rscript
	args = commandArgs(trailingOnly=TRUE)

	message("Installing necessary libraries if not already installed")
	if (!requireNamespace("BiocManager", quietly = TRUE))
	install.packages("BiocManager")
	if (!requireNamespace("mzR", quietly = TRUE))
	install.packages("mzR")
	if (!requireNamespace("data.table", quietly = TRUE))
	install.packages("data.table")

	#!/usr/bin/bash

	# $1 is the file(s') name to find and hash
	# $2 is the extension to be given to each renamed file

	find $1 -print0 \| xargs -0 md5sum \|
	while read -r newname oldname; do
	mv -v "$oldname" "$newname".$2
	done