Last active
April 28, 2020 14:52
-
-
Save nagadomi/76e18202cd59a423b8d8 to your computer and use it in GitHub Desktop.
csvigo example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- sudo luarocks install csvigo | |
-- でインストールしておく | |
require 'csvigo' | |
--[[ test.csv | |
a,b,c,d | |
1,2,3,4 | |
5,5,3,3 | |
--]] | |
-- mode=raw だとファイルのままのデータをtableで返す | |
csv = csvigo.load({path = "./test.csv", verbose = false, mode = "raw"}) | |
print("* mode = raw") | |
print(csv) | |
-- torch.Tensorに変換 | |
print("* tensor") | |
table.remove(csv, 1) -- remove header | |
csv_tensor = torch.Tensor(csv) | |
print(csv_tensor) | |
-- 適当に計算してみる | |
print("* row mean") | |
print(csv_tensor:mean(1)) | |
-- tensorにしない場合の参照方法(header削除済み) | |
print("* table access") | |
for row_id = 1, #csv do | |
for col_id = 1, #csv[row_id] do | |
print(string.format("%d,%d = %f", row_id, col_id, tonumber(csv[row_id][col_id]))) | |
end | |
end | |
-- mode=tidy だとヘッダーの名前を添字にアクセスできるtableで返す | |
csv = csvigo.load({path = "./test.csv", verbose = false, mode = "tidy"}) | |
print("* mode = tidy") | |
print(csv) | |
-- 添字アクセス | |
--print(csv["a"]) | |
--print(csv["a"][1]) | |
--print(csv["a"][2]) | |
for name, rec in pairs(csv) do | |
print("* " .. name) | |
print(torch.Tensor(rec)) | |
end | |
-- mode=queryは謎 | |
--[[ output | |
* mode = raw | |
{ | |
1 : | |
{ | |
1 : "a" | |
2 : "b" | |
3 : "c" | |
4 : "d" | |
} | |
2 : | |
{ | |
1 : "1" | |
2 : "2" | |
3 : "3" | |
4 : "4" | |
} | |
3 : | |
{ | |
1 : "5" | |
2 : "5" | |
3 : "3" | |
4 : "3" | |
} | |
} | |
* tensor | |
1 2 3 4 | |
5 5 3 3 | |
[torch.DoubleTensor of dimension 2x4] | |
* row mean | |
3.0000 3.5000 3.0000 3.5000 | |
[torch.DoubleTensor of dimension 1x4] | |
* table access | |
1,1 = 1.000000 | |
1,2 = 2.000000 | |
1,3 = 3.000000 | |
1,4 = 4.000000 | |
2,1 = 5.000000 | |
2,2 = 5.000000 | |
2,3 = 3.000000 | |
2,4 = 3.000000 | |
* mode = tidy | |
{ | |
b : | |
{ | |
1 : "2" | |
2 : "5" | |
} | |
d : | |
{ | |
1 : "4" | |
2 : "3" | |
} | |
a : | |
{ | |
1 : "1" | |
2 : "5" | |
} | |
c : | |
{ | |
1 : "3" | |
2 : "3" | |
} | |
} | |
* b | |
2 | |
5 | |
[torch.DoubleTensor of dimension 2] | |
* d | |
4 | |
3 | |
[torch.DoubleTensor of dimension 2] | |
* a | |
1 | |
5 | |
[torch.DoubleTensor of dimension 2] | |
* c | |
3 | |
3 | |
[torch.DoubleTensor of dimension 2] | |
]] | |
--[[ 激ヤバ情報 | |
ただ、luajitのテーブルは4GB以上確保できないという制限があって(torch.Tensorにはこのような制限はない) | |
csvigoはデータ全部読んでから返すようなので、巨大なCSVは行ごとに読みながらCSVパースする処理を自分で書く必要がある気がする | |
]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment