Created
November 10, 2012 20:54
-
-
Save tmichel/4052444 to your computer and use it in GitHub Desktop.
A naive implementation of Cocke-Younger-Kasimi algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A naive implementation of Cocke-Younger-Kasimi algorithm | |
# | |
# [Symbols are not treated the same] | |
# - :a => letter | |
# - :A => variable | |
# - :S => start variable, must be the first to define | |
module Cyk | |
# A result table. Looks something like this for a 4 letter word: | |
# | |
# |__| | |
# |__|__| | |
# |__|__|__| | |
# |__|__|__|__| | |
# | |
# indexing is base on 1, the lower left corner is 1,1 | |
# | |
class ResultTable | |
attr_reader :rows, :cols | |
def initialize(rows, cols) | |
@rows = rows | |
@cols = cols | |
@ary = Array.new(rows) { |i| Array.new(cols - i) } | |
end | |
def [](j, k) | |
@ary[j-1][k-1] | |
end | |
def []=(j, k, val) | |
@ary[j-1][k-1] = val | |
end | |
def pretty_print | |
@ary.reverse_each { |e| p e } | |
nil | |
end | |
def upper_left | |
raise 'Invalid number of elements in the upper left cell.' if @ary.last.size > 1 | |
@ary.last.flatten | |
end | |
end | |
# Cocke-Younger-Kasimi algorithm | |
# | |
# @param word [String] the given word to match against the grammar | |
# @param grammar [Hash] the grammar in CNF (Chomsky normal form), | |
# the :S is the starting point, :eps is the empty word | |
# | |
# A grammar looks like: | |
# grammar = | |
# { | |
# :S => [:a, [:A, :B]], | |
# :A => [:a], | |
# :B => [:b] | |
# } | |
# | |
# @return [Boolean] true if the word can be generated by the grammar, | |
# false otherwise | |
def self.cyk(word, grammar) | |
rows = cols = word.length | |
result = ResultTable.new rows, cols | |
# j: number of rows | |
# k: number of cols | |
(1..rows).each do |j| | |
(1..(cols-j+1)).each do |i| | |
if j == 1 | |
letter = word[i - 1].to_sym | |
result[j,i] = get_variables_for_letter letter, grammar | |
else | |
range = 1..(j-1) | |
variables = [] | |
range.each do |l| | |
cell1 = result[l, i] | |
cell2 = result[j-l, i+l] | |
variables << get_variables_for_rule(cell1, cell2, grammar) | |
end | |
result[j,i] = variables.flatten.uniq | |
end | |
end | |
end | |
result.pretty_print | |
# if the (1,k) cell contains S, the word can be generated by the grammar | |
result.upper_left.include? :S | |
end | |
private | |
# gets the the variables which generate the given letter | |
def self.get_variables_for_letter(letter, grammar) | |
variables = [] | |
grammar.each do |k, v| | |
variables << k unless v.select { |x| x == letter }.empty? | |
end | |
variables | |
end | |
# gets the variables that have rules from cell1.product(cell2) | |
def self.get_variables_for_rule(cell1, cell2, grammar) | |
rules = cell1.product cell2 | |
grammar.select { |k, v| !(v & rules).empty? }.keys | |
end | |
end | |
grammar = { | |
:S => [[:A, :B], [:B, :C]], | |
:A => [[:B, :A], :a], | |
:B => [[:C, :C], :b], | |
:C => [[:A, :B], :a] | |
} | |
puts Cyk.cyk("baaba", grammar) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment