Skip to content

Instantly share code, notes, and snippets.

@lodestone
Forked from inukshuk/predict.rb
Last active August 29, 2015 14:23
Show Gist options
  • Save lodestone/d43fa649726edfde2427 to your computer and use it in GitHub Desktop.
Save lodestone/d43fa649726edfde2427 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'csl/styles'
require 'citeproc/ruby'
require 'httparty'
require 'thread'
API_KEY = '' # Add your key here!
exit if API_KEY.empty?
STYLES = Hash.new { |h,k| h[k] = CSL::Style.load(k) }
# Pre-load all styles
Thread.new do
CSL::Style.ls.each do |id|
STYLES[id]
end
end
class Predictor
include HTTParty
base_uri 'anystyle.io'
attr_reader :locale, :styles, :limit, :workers
def initialize(token, locale = 'en', limit = 10, workers = 4)
@token, @locale, @limit, @workers, @styles =
token, locale, limit, workers, CSL::Style.ls
end
def predict(references)
parse(references).each do |reference, data|
reference = reference.strip
print "Computing distances..."
queue = styles.dup
distances, threads = [], []
workers.times do
threads << Thread.new do
renderer = CiteProc::Ruby::Renderer.new locale: locale, format: 'text'
counter = 0
until queue.empty?
id = queue.shift
style = STYLES[id]
if style && style.bibliography
begin
string = renderer.render cite(data), style.bibliography
rescue => e
warn "Failed to render #{id}: #{e.message}"
ensure
renderer.state.history.discard
end
end
string ||= ''
distances.push [
id, string, distance(reference, string)
]
counter += 1
print '.' if counter % 100 == 0
end
end
end
threads.each do |thread|
thread.value rescue warn "Rendering thread crashed: #{$!.message}"
end
puts 'done'
print 'Sorting distances...'
distances.sort_by!(&:last)
puts 'done'
puts "The #@limit best matches are:"
distances.take(limit).each do |id, rendition, d|
print "#{id} "
if d.zero?
puts 'perfect match'
else
puts " (#{d}):\n#{rendition}"
end
end
end
end
private
def parse(references)
references = Array(references)
print "Trying to parse #{references.length} reference(s) on anystyle.io..."
response = post '/parse/references.citeproc',
references: references
fail response.message unless response.code == 200
puts 'done'
references.zip JSON.parse(response.body)
end
def post(path, options = {})
self.class.post path, body: options.merge!(access_token: @token)
end
def cite(data)
CiteProc::CitationItem.new id: data['id'] || 'ID' do |c|
c.data = CiteProc::Item.new data
end
end
# Computes the Levenshtein distance of two strings
# using the Wagner-Fischer algorithm
def distance(source, target)
s, t = source.to_s.chars, target.to_s.chars
return t.length if s.empty?
return s.length if t.empty?
m, n = s.length, t.length
d = [(0..n).to_a]
for i in (1..m); d[i] = [i] end
for j in (1..n)
for i in (1..m)
if s[i-1] == t[j-1]
d[i][j] = d[i-1][j-1] # no operation
else
d[i][j] = [
d[i-1][j] + 1, # deletion
d[i][j-1] + 1, # insertion
d[i-1][j-1] + 1 # substitution
].min
end
end
end
d[m][n]
end
end
p = Predictor.new API_KEY
if ARGV.empty?
while input = gets
p.predict input unless input.strip.empty?
end
else
p.predict ARGV
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment