Created
July 13, 2016 12:29
-
-
Save anonymous/7ce9cb03b5bc6cfe6f96ec6c4940602e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/ruby | |
def main argv | |
n = (argv[0] || 2).to_i rescue 2 | |
m = (argv[1] || 20).to_i rescue 20 | |
g = n_gram n | |
g. | |
enum_for(:each_pair). | |
lazy. | |
sort_by {|k, v| -v }. | |
first(m). | |
each do |k, v| | |
printf("%7d %s\n", v, k.join(' -> ')) | |
end | |
end | |
def n_gram n | |
gram = Hash.new 0 | |
each_iseq_of all_sources do |i| | |
each_sequence i.to_a do |a| | |
insns = a.select{|j| Array === j }.map{|j| j[0] } | |
insns.each_cons n do |c| | |
gram[c] += 1 | |
end | |
end | |
end | |
return gram | |
end | |
def all_sources | |
return $LOADED_FEATURES.grep(/\.rb$/) | |
end | |
def each_iseq_of files | |
files.each do |f| | |
begin | |
i = RubyVM::InstructionSequence.compile_file f, false | |
yield i | |
rescue Errno::ENOENT => e | |
STDERR.printf "%s\n", e.message | |
end | |
end | |
end | |
def each_sequence a | |
return unless a | |
return unless a = a[13] | |
a.each do |i| | |
next unless Array === i | |
j = case i[0] | |
when :putiseq then 1 | |
when :once then 1 | |
when :defineclass then 2 | |
when :send then 3 | |
when :invokesuper then 3 | |
else next | |
end | |
each_sequence i[j] do |b| | |
yield b | |
end | |
end | |
yield a | |
end | |
main ARGV |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
3-gram and 4-gram, also against redmine