Last active
December 2, 2019 21:39
-
-
Save rugyoga/321af0192af374596641a5a1cc81a38e to your computer and use it in GitHub Desktop.
Trying to uses streams to process large PGN files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Chess.PGN.Stream do | |
def split(stream, f), do: { Stream.take_while(stream, f), Stream.drop_while(stream, f) } | |
def process_header(line, hash) do | |
caps = Regex.named_captures(~r/\[(?<key>.+)\s+\"(?<value>.+)\"\s*\]/, line) | |
if caps == nil do | |
hash | |
else | |
Map.put(hash, caps["key"], caps["value"]) | |
end | |
end | |
def process_headers(headers) do | |
headers | |
|> Enum.to_list | |
|> Enum.map(&String.trim/1) | |
|> Enum.reduce(%{}, &process_header/2) | |
end | |
def process_moves(moves) do | |
moves | |
|> Enum.to_list | |
|> Enum.map(&String.trim/1) | |
|> Enum.join(" ") | |
|> String.trim() | |
end | |
def parse_game(stream) do | |
{headers, after_headers} = split(stream, fn line -> line =~ "[" end) | |
if Enum.empty?(headers) do | |
{:halt, stream} | |
else | |
{moves, after_moves} = split(after_headers, fn line -> !(line =~ "[") end) | |
{[%{ | |
headers: process_headers(headers), | |
moves: process_moves(moves) | |
}], | |
after_moves} | |
end | |
end | |
def parse_games(stream, games) do | |
case parse_game(stream) do | |
{:halt, stream} -> games | |
{game, stream} -> parse_games(stream, [game | games] ) | |
end | |
end | |
end | |
_input = """ | |
[Event "Mechanics' Bedjanian TNM; G/2 d/5"] | |
[Site "San Francisco"] | |
[Date "2018.11.13"] | |
[Round "4.42"] | |
[White "Abdi, Mehron Edward"] | |
[Black "Chan, John"] | |
[Result "0-1"] | |
[ECO "C00"] | |
[WhiteElo "1142"] | |
[BlackElo "1546"] | |
[PlyCount "106"] | |
[EventDate "2018.??.??"] | |
[Source "ChessBase"] | |
[SourceDate "2007.09.04"] | |
1. e4 e6 2. Bc4 c6 3. Nf3 b5 4. Bb3 a5 5. d3 ({Both scoresheets say} 5. c3 { | |
Well, one says "Pc3"...}) 5... h6 6. O-O Bb7 7. a4 b4 8. c3 {Both scoresheets} | |
Na6 9. Re1 d6 10. d4 Ne7 11. d5 cxd5 12. exd5 e5 13. Bc4 Bc8 14. Qe2 Nc5 15. | |
Bd2 Qb6 16. Rc1 Ng6 17. Qe1 b3 18. Qe2 Be7 19. Na3 O-O 20. Be3 Kh8 21. Nb5 f5 | |
22. Bxb3 f4 23. Bxc5 Qxc5 24. Bc4 Qb6 25. b4 Bf6 26. Qc2 Ne7 27. bxa5 Rxa5 28. | |
Na3 Qc7 29. Bb5 Nxd5 30. Nc4 Ra7 31. a5 Ne7 32. Rd1 d5 33. Nb6 Bf5 34. Qb3 Rxa5 | |
35. Ba4 Rb8 36. Qa3 Bc2 37. Rd2 Bxa4 38. Nxa4 Qc4 39. Rda2 Qc7 40. Qc1 Rba8 41. | |
Qd1 Nf5 42. g4 fxg3 43. hxg3 Nd6 44. Ne1 e4 45. Nc2 Nc4 46. Nb4 Qb7 47. Rd2 | |
Nxd2 48. Rc1 Nf3+ 49. Kg2 Rd8 50. c4 Qxb4 51. cxd5 Raxd5 52. Qxd5 Rxd5 53. Rc8+ | |
Kh7 0-1 | |
""" | |
path = Path.join([__DIR__, "..", "data", "Mechanics2018.PGN"]) | |
stream = File.stream!(path, modes: [], lines_or_bytes: 2048) | |
games = Chess.PGN.Stream.parse_games(stream, []) | |
IO.inspect(games) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment