Skip to content

Instantly share code, notes, and snippets.

train<-read.csv("C:/Users/vcomas/OIQ/Kaggle/Titanic/train.csv",colClasses=c("survived"="factor","pclass"="factor"))
str(train)
library("randomForest")
library(ipred)
randomForest(survived~fare+sex+pclass,data=train,ntree=100, keep.forest=FALSE)$err.rate[100]
errorest(survived~fare+sex+pclass+age,data=train,
model=randomForest, estimator = "cv", est.para=control.errorest(k=10), ntree=10, mtry=2)$err
--------- survived ~ C(pclass) + C(title) + C(sex) + fare + age + sibsp + parch + C(embarked) ---------
Logistic Regression: 0.518866276531
Support Vector Machines: 0.554240545026
ExtraTree Classifier: 0.520438466242
Random Forest: 0.51729408682
Gradient Boosting: 0.516507991964
--------- survived ~ fare + C(pclass) + C(sex) ---------
{
"metadata": {
"name": "stumbleupon"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
{
"metadata": {
"name": "Scrape Victor"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
@elyase
elyase / gist:6785287
Last active December 24, 2015 10:39
R vs Python comparison http://nbviewer.ipython.org/6785287
{
"metadata": {
"name": "Untitled0"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
function svmStruct = best_svm_classifer_rbf(cdata,labels)
%Write a function called crossfun to calculate the predicted classification yfit from a test vector
%xtest, when the SVM is trained on a sample xtrain that has classification ytrain.
function yfit = crossfun(xtrain,ytrain,xtest, rbf_sigma, boxconstraint)
% Train the model on xtrain, ytrain,
% and get predictions of class of xtest and output it as yfit
# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=0)
# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]},
{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
model = GridSearchCV(SVC(C=1), tuned_parameters, cv=5, scoring=score)
model.fit(X_train, y_train)
@elyase
elyase / count_motifs.py
Last active December 28, 2015 13:39
Counts motifs appearances in a list of DNA sequences
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
def tokenizer(s):
width = 7
return [s[i:i+width] for i in range(len(s)-width+1)]
def count_chunks(sequence_list):
vectorizer = CountVectorizer(tokenizer=tokenizer)
X = vectorizer.fit_transform(sequence_list)
from pyquery import PyQuery as pq
url = 'https://www.nuans.com/RTS2/en/jur_codes-codes_jur_en.cgi#Example_of_report_layouts'
d = pq(url)
l = []
for th in d.items('.borderless td:nth-child(1)'):
left = th.text()
right = th.next().text()
tr = th.parent()
tbody = tr.parent()
title = tbody('th:first').text() # first element
if title == 'NUANS Reports & Preliminary Searches' and right in ['Active', 'Inactive']:
l.append([left, right])