This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib as mpl | |
from matplotlib import pyplot as plt | |
from matplotlib.patches import Patch | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
# Paper 1 | |
paper_1_start = "02/05/2018" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Tuple | |
import warnings | |
import biom | |
import click | |
from gemelli.rpca import rpca | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(biomformat) | |
library(ANCOMBC) | |
library(phyloseq) | |
tbl_file <- "data/shi_age_prediction/processed/processed_skin_tbl.biom" | |
tbl <- biomformat::read_biom(tbl_file) | |
tbl <- as.data.frame(as.matrix(biomformat::biom_data(tbl))) | |
md_file <- "data/shi_age_prediction/processed/processed_skin_md.tsv" | |
md <- read.table(md_file, sep="\t", row.names=1, header=T) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
get_ord_dataframe <- function(lines, linestart){ | |
header_line <- strsplit(lines[linestart], split="\t")[[1]] | |
num_rows = as.numeric(header_line[2]) | |
num_cols = as.numeric(header_line[3]) | |
if (num_rows == 0){return(data.frame())} | |
data <- strsplit(lines[(linestart+1):(linestart+num_rows)], split="\t") | |
names <- unlist(lapply(data, function(x) x[1])) | |
coords <- lapply(data, function(x) strsplit(x[2:(2+num_cols-1)], split="\t")) | |
coords <- data.frame(matrix(unlist(coords), nrow=length(coords), byrow=T), stringsAsFactors=FALSE) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def newton_method(f, df, x0, err, W): | |
""" | |
Estimate p using Newton-Raphson method. | |
Output: root approximation given error criterion (err) | |
""" | |
delta = err+1 | |
x = x0 | |
while delta > err: | |
x1 = x - f(x, W) / df(x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
x <- 1:10 | |
y <- 1 + 2.5 * x | |
N <- length(x) | |
x_mean <- mean(x) | |
y_mean <- mean(y) | |
m <- sum((x - x_mean) * (y - y_mean)) / sum((x - x_mean)^2) | |
b <- y_mean - m * x_mean | |
# m = 2.5 | |
# b = 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> summarize(by_sex, median(LIMIT_BAL)) | |
# A tibble: 2 × 2 | |
SEX `median(LIMIT_BAL)` | |
<chr> <dbl> | |
1 1 130000 | |
2 2 150000 | |
> summarize(by_sex, length(dflt[dflt=='1'])/length(SEX)) | |
# A tibble: 2 × 2 | |
SEX `length(dflt[dflt == "1"])/length(SEX)` | |
<chr> <dbl> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
d <- read.csv('default of credit card clients.csv', header=F, stringsAsFactors=F) | |
colnames(d) <- unlist(d[2,]) | |
d <- d[-c(1,2),] | |
colnames(d)[25] <- 'dflt' | |
d$LIMIT_BAL <- as.numeric(d$LIMIT_BAL) | |
# MALE VS. FEMALE | |
# ---------------- |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
votes <- read.csv('csv-ballot-measures.csv', header=TRUE, stringsAsFactors=FALSE) | |
votes <- votes[,-1:-2] | |
d.votes <- votes[which(votes$BALLOT_MEASURE_ID==62),] | |
drops <- c('COUNTY_ID', 'BALLOT_MEASURE_ID', 'BALLOT_MEASURE_NAME', 'BALLOT_MEASURE_TITLE') | |
d.votes <- d.votes[, !names(votes) %in% drops] | |
d.votes$YES_COUNT <- as.numeric(gsub(',', '', d.votes$YES_COUNT)) |