This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns ignacio.tfidf (:require [clojure.contrib.string :as string])) ;; Simple tfidf in clojure, for fun. | |
| (def stopwords (set (string/split #"\n" (slurp "./stopwords.txt")))) | |
| (defn tokenize [raw-text] ;; Lowercases and splits on non-letters, non-numbers. | |
| (remove stopwords (string/split #"[^a-z0-9äöüáéíóúãâêîôûàèìòùçñ]+" (string/lower-case raw-text)))) | |
| (defn idf2 [n-docs match] (Math/pow (Math/log (/ n-docs (count (keys match)))) 2)) | |
| (defn index-one [fname] ;; Index for one file. Given an fname, returns a map of token -> map of (fname, count) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [org.clojars.ithayer/plaid-penguin "1.0.0"] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var sys = require('sys'), | |
| Path = require('path'), | |
| Mu = require('../mu'), | |
| Script = process.binding('evals').Script; | |
| exports.compile = compile; | |
| exports.compilePartial = compilePartial; | |
| function RenderEventEmitter(options) { | |
| this.chunkSize = options.chunkSize || 1024; |
NewerOlder