Last active
August 12, 2016 06:33
-
-
Save Dmitra/b76741090e9046588375 to your computer and use it in GitHub Desktop.
convert indented text to graphml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class File | |
def to_string | |
string = '' | |
self.each {|line| | |
string << line | |
} | |
return string | |
end | |
end | |
class Graph | |
attr_accessor :name | |
def initialize(labels=[]) | |
@name = "graph" | |
@edges = [] | |
@labels = labels | |
end | |
def add(label) | |
@labels << label | |
end | |
def connect(source, target) | |
s = @labels.index(source) | |
t = @labels.index(target) | |
if s and t and [email protected]?([s,t]) | |
@edges << [s, t] | |
end | |
end | |
def connect_by_id(s,t) | |
@edges << [s,t] if @edges[s] and @edges[t] | |
end | |
def to_file(name) | |
out = File.open(name, 'w') | |
out << @labels.join("\n") | |
end | |
def to_xml(name=(@name+".xml")) | |
xml = <<EOF | |
<?xml version="1.0" encoding="UTF-8"?> | |
<graphml xmlns="http://graphml.graphdrawing.org/xmlns"> | |
<graph edgedefault="directed"> | |
<key id="label" for="node" attr.name="label" attr.type="string"/> | |
EOF | |
footer = "</graph></graphml>" | |
@labels.each_with_index{|l, i| xml +="<node id=\"#{i}\"><data key=\"label\">#{l}</data></node>\n"} | |
@edges.each{|e| xml += "<edge source=\"#{e[0]}\" target=\"#{e[1]}\"></edge>\n"} | |
xml += footer | |
File.open(name, 'w') << xml | |
end | |
def self.read(file) | |
string = File.open(file, 'r').to_string | |
graph = case File.extname(file) | |
when ".graphml" then read_graphML(string) | |
when ".txt" then read_ws(string) #whitespaced | |
end | |
graph.name = File.basename(file, File.extname(file)) | |
graph | |
end | |
def self.extract_keywords(file) | |
str = File.open(file, 'r').to_string | |
str.chop!; str[0,1]=' '; str.lstrip! #remove first and last brace | |
array = str.split(/\}\{/).uniq.reject{|w| w.size == 1} #treat '}{' as separator and reject one symbol words | |
out = File.open("out", 'w') | |
out << array.join("\n") | |
end | |
def self.normalize(file) | |
`mystem.exe -l #{file} out` | |
end | |
private | |
def self.read_graphML(string) | |
ids = string.scan(/<node id\="((\w+|\:{2})*)"/).map{|id| id[0]} | |
labels = string.scan(/>(.*)<\/y:NodeLabel/).map{|l| l[0] unless l[0].match(/Folder/)}.compact | |
edges = string.scan(/<edge id=".+" source="((\w+|:{2})+)" target="((\w+|:{2})+)"/).map{|edge| [edge[0], edge[2]]} | |
graph = Graph.new(labels) | |
edges.each{|edge| graph.connect(labels[ids.index(edge[0])], labels[ids.index(edge[1])])} | |
ids.each_with_index{|id, i| | |
group = id.split(/::/) | |
if group.size > 1 | |
group.pop | |
graph.connect_by_id(ids.index(group.join('::')), i) | |
end | |
} | |
return graph | |
end | |
def self.read_ws(string) | |
graph = Graph.new() | |
parent = "" | |
string.split("\n").each_with_index{|node,index| | |
graph.add node | |
if node[0,2] == " " | |
node[0,2] = "" | |
graph.connect(parent, node) | |
puts "#{parent} - #{index}" | |
else | |
parent = node | |
puts "#{parent} ->" | |
end | |
} | |
return graph | |
end | |
end | |
Graph.read("input.txt").to_xml | |
#graph.to_file('out.graphml') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment