Skip to content

Instantly share code, notes, and snippets.

@CapsAdmin
Created September 28, 2018 08:48
Show Gist options
  • Save CapsAdmin/dcd45d5cfae649015968895d855c26c7 to your computer and use it in GitHub Desktop.
Save CapsAdmin/dcd45d5cfae649015968895d855c26c7 to your computer and use it in GitHub Desktop.
function write_table(data, fname)
local file = io.open(fname, "w")
io.output(file)
io.write(table.concat(data, " "))
io.close(file)
end
local function split_string(self, separator, max)
local tbl = {}
local current_pos = 1
for i = 1, #self do
local start_pos, end_pos = self:find(separator, current_pos, true)
if not start_pos then break end
tbl[i] = self:sub(current_pos, start_pos - 1)
current_pos = end_pos + 1
if i >= max then
return tbl
end
end
if current_pos > 1 then
tbl[#tbl + 1] = self:sub(current_pos)
else
tbl[1] = self
end
return tbl
end
local x = os.clock()
print(string.format("reading time %.2f\n", os.clock() - x))
local tokens = {}
local x = os.clock()
local i = 1
for line in io.lines(arg[1]) do
local res = split_string(line, "\t", 2)
if not excluded[res[2]] then
tokens[i] = res[2]
i = i + 1
end
end
print(string.format("splitting time %.2f\n", os.clock() - x))
local x = os.clock()
write_table(tokens, "glove_corpus.txt")
print(string.format("writing time %.2f\n", os.clock() - x))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment