-
-
Save lodestone/d43fa649726edfde2427 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'csl/styles' | |
require 'citeproc/ruby' | |
require 'httparty' | |
require 'thread' | |
API_KEY = '' # Add your key here! | |
exit if API_KEY.empty? | |
STYLES = Hash.new { |h,k| h[k] = CSL::Style.load(k) } | |
# Pre-load all styles | |
Thread.new do | |
CSL::Style.ls.each do |id| | |
STYLES[id] | |
end | |
end | |
class Predictor | |
include HTTParty | |
base_uri 'anystyle.io' | |
attr_reader :locale, :styles, :limit, :workers | |
def initialize(token, locale = 'en', limit = 10, workers = 4) | |
@token, @locale, @limit, @workers, @styles = | |
token, locale, limit, workers, CSL::Style.ls | |
end | |
def predict(references) | |
parse(references).each do |reference, data| | |
reference = reference.strip | |
print "Computing distances..." | |
queue = styles.dup | |
distances, threads = [], [] | |
workers.times do | |
threads << Thread.new do | |
renderer = CiteProc::Ruby::Renderer.new locale: locale, format: 'text' | |
counter = 0 | |
until queue.empty? | |
id = queue.shift | |
style = STYLES[id] | |
if style && style.bibliography | |
begin | |
string = renderer.render cite(data), style.bibliography | |
rescue => e | |
warn "Failed to render #{id}: #{e.message}" | |
ensure | |
renderer.state.history.discard | |
end | |
end | |
string ||= '' | |
distances.push [ | |
id, string, distance(reference, string) | |
] | |
counter += 1 | |
print '.' if counter % 100 == 0 | |
end | |
end | |
end | |
threads.each do |thread| | |
thread.value rescue warn "Rendering thread crashed: #{$!.message}" | |
end | |
puts 'done' | |
print 'Sorting distances...' | |
distances.sort_by!(&:last) | |
puts 'done' | |
puts "The #@limit best matches are:" | |
distances.take(limit).each do |id, rendition, d| | |
print "#{id} " | |
if d.zero? | |
puts 'perfect match' | |
else | |
puts " (#{d}):\n#{rendition}" | |
end | |
end | |
end | |
end | |
private | |
def parse(references) | |
references = Array(references) | |
print "Trying to parse #{references.length} reference(s) on anystyle.io..." | |
response = post '/parse/references.citeproc', | |
references: references | |
fail response.message unless response.code == 200 | |
puts 'done' | |
references.zip JSON.parse(response.body) | |
end | |
def post(path, options = {}) | |
self.class.post path, body: options.merge!(access_token: @token) | |
end | |
def cite(data) | |
CiteProc::CitationItem.new id: data['id'] || 'ID' do |c| | |
c.data = CiteProc::Item.new data | |
end | |
end | |
# Computes the Levenshtein distance of two strings | |
# using the Wagner-Fischer algorithm | |
def distance(source, target) | |
s, t = source.to_s.chars, target.to_s.chars | |
return t.length if s.empty? | |
return s.length if t.empty? | |
m, n = s.length, t.length | |
d = [(0..n).to_a] | |
for i in (1..m); d[i] = [i] end | |
for j in (1..n) | |
for i in (1..m) | |
if s[i-1] == t[j-1] | |
d[i][j] = d[i-1][j-1] # no operation | |
else | |
d[i][j] = [ | |
d[i-1][j] + 1, # deletion | |
d[i][j-1] + 1, # insertion | |
d[i-1][j-1] + 1 # substitution | |
].min | |
end | |
end | |
end | |
d[m][n] | |
end | |
end | |
p = Predictor.new API_KEY | |
if ARGV.empty? | |
while input = gets | |
p.predict input unless input.strip.empty? | |
end | |
else | |
p.predict ARGV | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment