Gibs gibsramen

🏠

Working from home

Software Engineer @czbiohub-sf. Bioinformatics PhD interested in developing software & stats to tackle biological problems. NeoVim fanatic.

gibsramen / gantt.py

Last active February 22, 2022 02:03

Gantt Chart Using Matplotlib

	import matplotlib as mpl
	from matplotlib import pyplot as plt
	from matplotlib.patches import Patch
	import numpy as np
	import pandas as pd
	import seaborn as sns


	# Paper 1
	paper_1_start = "02/05/2018"

gibsramen / generate_figure.py

Last active January 18, 2022 20:01

	from typing import Tuple
	import warnings

	import biom
	import click
	from gemelli.rpca import rpca
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import seaborn as sns

gibsramen / run_ancombc.R

Created November 17, 2021 20:36

ANCOM-BC in BIOM

	library(biomformat)
	library(ANCOMBC)
	library(phyloseq)

	tbl_file <- "data/shi_age_prediction/processed/processed_skin_tbl.biom"
	tbl <- biomformat::read_biom(tbl_file)
	tbl <- as.data.frame(as.matrix(biomformat::biom_data(tbl)))

	md_file <- "data/shi_age_prediction/processed/processed_skin_md.tsv"
	md <- read.table(md_file, sep="\t", row.names=1, header=T)

gibsramen / skbio_ordination_biplot.R

Created September 2, 2020 20:36

	get_ord_dataframe <- function(lines, linestart){
	header_line <- strsplit(lines[linestart], split="\t")[[1]]
	num_rows = as.numeric(header_line[2])
	num_cols = as.numeric(header_line[3])
	if (num_rows == 0){return(data.frame())}

	data <- strsplit(lines[(linestart+1):(linestart+num_rows)], split="\t")
	names <- unlist(lapply(data, function(x) x[1]))
	coords <- lapply(data, function(x) strsplit(x[2:(2+num_cols-1)], split="\t"))
	coords <- data.frame(matrix(unlist(coords), nrow=length(coords), byrow=T), stringsAsFactors=FALSE)

gibsramen / logicle_nr.py

Created March 7, 2017 19:07

Newton-Raphson Code

	def newton_method(f, df, x0, err, W):
	"""
	Estimate p using Newton-Raphson method.

	Output: root approximation given error criterion (err)
	"""
	delta = err+1
	x = x0
	while delta > err:
	x1 = x - f(x, W) / df(x)

gibsramen / ds_challenge_4.R

Last active January 11, 2017 22:02

	x <- 1:10
	y <- 1 + 2.5 * x
	N <- length(x)
	x_mean <- mean(x)
	y_mean <- mean(y)
	m <- sum((x - x_mean) * (y - y_mean)) / sum((x - x_mean)^2)
	b <- y_mean - m * x_mean

	# m = 2.5
	# b = 1

gibsramen / credit_card_basics_output.txt

Created January 5, 2017 19:12

	> summarize(by_sex, median(LIMIT_BAL))
	# A tibble: 2 × 2
	SEX `median(LIMIT_BAL)`
	<chr> <dbl>
	1 1 130000
	2 2 150000
	> summarize(by_sex, length(dflt[dflt=='1'])/length(SEX))
	# A tibble: 2 × 2
	SEX `length(dflt[dflt == "1"])/length(SEX)`
	<chr> <dbl>

gibsramen / credit_card_basics.R

Created January 5, 2017 19:08

	library(dplyr)

	d <- read.csv('default of credit card clients.csv', header=F, stringsAsFactors=F)
	colnames(d) <- unlist(d[2,])
	d <- d[-c(1,2),]
	colnames(d)[25] <- 'dflt'
	d$LIMIT_BAL <- as.numeric(d$LIMIT_BAL)

	# MALE VS. FEMALE
	# ----------------

gibsramen / prop62_pop_yes_prop.R

Created December 31, 2016 21:12

Proportion of 'Yes' Votes on Proposition 62 vs. log of County Population (11/08/2016)

	library(ggplot2)

	votes <- read.csv('csv-ballot-measures.csv', header=TRUE, stringsAsFactors=FALSE)
	votes <- votes[,-1:-2]
	d.votes <- votes[which(votes$BALLOT_MEASURE_ID==62),]

	drops <- c('COUNTY_ID', 'BALLOT_MEASURE_ID', 'BALLOT_MEASURE_NAME', 'BALLOT_MEASURE_TITLE')
	d.votes <- d.votes[, !names(votes) %in% drops]

	d.votes$YES_COUNT <- as.numeric(gsub(',', '', d.votes$YES_COUNT))