This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import matplotlib as mpl | |
| from matplotlib import pyplot as plt | |
| from matplotlib.patches import Patch | |
| import numpy as np | |
| import pandas as pd | |
| import seaborn as sns | |
| # Paper 1 | |
| paper_1_start = "02/05/2018" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import Tuple | |
| import warnings | |
| import biom | |
| import click | |
| from gemelli.rpca import rpca | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import pandas as pd | |
| import seaborn as sns |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(biomformat) | |
| library(ANCOMBC) | |
| library(phyloseq) | |
| tbl_file <- "data/shi_age_prediction/processed/processed_skin_tbl.biom" | |
| tbl <- biomformat::read_biom(tbl_file) | |
| tbl <- as.data.frame(as.matrix(biomformat::biom_data(tbl))) | |
| md_file <- "data/shi_age_prediction/processed/processed_skin_md.tsv" | |
| md <- read.table(md_file, sep="\t", row.names=1, header=T) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| get_ord_dataframe <- function(lines, linestart){ | |
| header_line <- strsplit(lines[linestart], split="\t")[[1]] | |
| num_rows = as.numeric(header_line[2]) | |
| num_cols = as.numeric(header_line[3]) | |
| if (num_rows == 0){return(data.frame())} | |
| data <- strsplit(lines[(linestart+1):(linestart+num_rows)], split="\t") | |
| names <- unlist(lapply(data, function(x) x[1])) | |
| coords <- lapply(data, function(x) strsplit(x[2:(2+num_cols-1)], split="\t")) | |
| coords <- data.frame(matrix(unlist(coords), nrow=length(coords), byrow=T), stringsAsFactors=FALSE) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def newton_method(f, df, x0, err, W): | |
| """ | |
| Estimate p using Newton-Raphson method. | |
| Output: root approximation given error criterion (err) | |
| """ | |
| delta = err+1 | |
| x = x0 | |
| while delta > err: | |
| x1 = x - f(x, W) / df(x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| x <- 1:10 | |
| y <- 1 + 2.5 * x | |
| N <- length(x) | |
| x_mean <- mean(x) | |
| y_mean <- mean(y) | |
| m <- sum((x - x_mean) * (y - y_mean)) / sum((x - x_mean)^2) | |
| b <- y_mean - m * x_mean | |
| # m = 2.5 | |
| # b = 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| > summarize(by_sex, median(LIMIT_BAL)) | |
| # A tibble: 2 × 2 | |
| SEX `median(LIMIT_BAL)` | |
| <chr> <dbl> | |
| 1 1 130000 | |
| 2 2 150000 | |
| > summarize(by_sex, length(dflt[dflt=='1'])/length(SEX)) | |
| # A tibble: 2 × 2 | |
| SEX `length(dflt[dflt == "1"])/length(SEX)` | |
| <chr> <dbl> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(dplyr) | |
| d <- read.csv('default of credit card clients.csv', header=F, stringsAsFactors=F) | |
| colnames(d) <- unlist(d[2,]) | |
| d <- d[-c(1,2),] | |
| colnames(d)[25] <- 'dflt' | |
| d$LIMIT_BAL <- as.numeric(d$LIMIT_BAL) | |
| # MALE VS. FEMALE | |
| # ---------------- |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(ggplot2) | |
| votes <- read.csv('csv-ballot-measures.csv', header=TRUE, stringsAsFactors=FALSE) | |
| votes <- votes[,-1:-2] | |
| d.votes <- votes[which(votes$BALLOT_MEASURE_ID==62),] | |
| drops <- c('COUNTY_ID', 'BALLOT_MEASURE_ID', 'BALLOT_MEASURE_NAME', 'BALLOT_MEASURE_TITLE') | |
| d.votes <- d.votes[, !names(votes) %in% drops] | |
| d.votes$YES_COUNT <- as.numeric(gsub(',', '', d.votes$YES_COUNT)) |