Skip to content

Instantly share code, notes, and snippets.

i <- seq(1, numberguesses)
result <- apply(i, function(x){
a = random(1,10000000)
b = random(a,10000001)
cond_exp <- CONEXP(a,b)
cond_exp <- cond_exp[cond_exp is within .1% of actual expected value]
return (cond_exp)
}
)
library(readr)
library(dplyr)
df <- read_csv('~/desktop/guns.csv')
library(ggplot2)
library(ggthemes)
df %>%
arrange(desc(guns_per_100_people)) %>%
top_n(n = 20, wt = guns_per_100_people) %>%
ggplot(aes(x = country,
library(quanteda)
library(dplyr)
library(tidyr)
library(networkD3)
load(url("http://www.kenbenoit.net/files/presDebateCorpus2016seg.RData"))
candidates <- subset(presDebateCorpus2016seg, speakertype == 'candidate')
dfm <- dfm(candidates, groups = c("tag"), ngrams = 1:3,
ignoredFeatures = c('people','go','going','will','know','think',
'country','get','applause','want','need',
temp_precip = []
for i,p in enumerate(correct_precip):
if i % 6 == 0:
temp_precip.append(p)
else:
temp_precip.append(p-correct_precip[i-1])
if i % 10000 == 0:
print i
correct_precip = []
for t in df.DateTime:
if t.hour not in [0,6,12,18]:
precip_1 = df.precip[df.DateTime==t]-df.precip[df.DateTime==t-relativedelta(hours=1)]
dat = {'DateTime' : t, 'precipitation' : precip_1}
correct_precip.append(dat)
else:
dat = {'DateTime' : t, 'precipitation' : df.precip[df.DateTime==t]}
correct_precip.append(dat)
# head(postcodes)
# postcode latitude longitude
#1 SK17 53.24400 -1.879000
#2 OX15 52.05515 -1.429983
#3 BA8 51.01226 -2.411274
#4 CB21 52.12717 0.272036
#5 CA13 54.66000 -3.366000
#6 CH7 53.6900 -3.133000
#
# head(latlon)
test
val documents: RDD[(Long, Vector)] =
tokenized.zipWithIndex.map { case (tokens, id) =>
val counts = new mutable.HashMap[Int, Double]()
tokens.foreach { term =>
if (vocab.contains(term)) {
val idx = vocab(term)
counts(idx) = counts.getOrElse(idx, 0.0) + 1.0
}
}
(id, Vectors.sparse(vocab.size, counts.toSeq))
from nltk.corpus import stopwords
from nltk import wordpunct_tokenize
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
import nltk.data
import re
tagger = nltk.data.load("trained_brill.pickle")
tokenizer = RegexpTokenizer(r'\w+')
quoted = re.compile('"[^"]*"')
try:
features_list = driver.find_elements_by_class_name('detail_attrs')
except:
i = 1
while i < 10:
sleep(5)
try:
features_list = driver.find_elements_by_class_name('detail_attrs')
except:
i += 1