This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns clojure-elasticsearch.core | |
(:require [clojurewerkz.elastisch.rest :as esr] | |
[clojurewerkz.elastisch.rest.index :as idx] | |
[clojurewerkz.elastisch.rest.document :as doc] | |
[clojurewerkz.elastisch.query :as q])) | |
(def index-name "test") | |
(def type "articles") | |
(def query "third") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns ibmwatson.text | |
(:require [clj-http.client :as client])) | |
(defn- text-call | |
[text endpoint params] | |
(let [root_url (str "http://access.alchemyapi.com/calls/text/" endpoint)] | |
(client/get root_url {:query-params params}))) | |
(defn entities | |
[text api_key] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk import word_tokenize | |
from nltk.corpus import reuters | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from nltk.stem.porter import PorterStemmer | |
import re | |
from nltk.corpus import stopwords | |
cachedStopWords = stopwords.words("english") | |
def tokenize(text): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import logging | |
import time | |
import datetime | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn import cross_validation | |
from sklearn.svm import SVC | |
from sklearn.metrics import accuracy_score | |
from sklearn.feature_selection import VarianceThreshold |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.metrics import accuracy_score | |
from sklearn import cross_validation | |
import pandas as pd | |
import logging | |
import time | |
import datetime | |
import numpy as np | |
def log_info(message): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_csv(file_path, has_header = True): | |
with open(file_path) as f: | |
if has_header: f.readline() | |
data = [] | |
for line in f: | |
line = line.strip().split(",") | |
data.append([float(x) for x in line]) | |
return data | |
def write_csv(file_path, data): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier | |
import csv_io | |
def main(): | |
#Read in the training data and train the model | |
train_data = csv_io.read_csv("data/train.csv") | |
#the first column of the training set will be the judgements | |
judgements = [str(int (x[0])) for x in train_data] | |
train_instances = [x[1:] for x in train_data] | |