Created
September 23, 2016 14:43
-
-
Save eksperimental/98c2cdd01b868a947a69cb0f51fdc57c to your computer and use it in GitHub Desktop.
How to parse a text file with pattern matching, and converted to a nested list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://elixirforum.com/t/newbie-needs-help-parsing-a-file/1762 | |
defmodule RecordFile do | |
def read(file) do | |
{:ok, data} = File.read(file) | |
data | |
|> String.split("\n") | |
|> filter | |
end | |
def filter(list), | |
do: filter(list, [], [], false) | |
defp filter(["~~BOM" | tail], result, acc, _record?), | |
do: filter(tail, result, acc, true) | |
defp filter(["~~" | tail], result, acc, _record?), | |
do: filter(tail, [Enum.reverse(acc) | result], [], false) | |
defp filter([head | tail], result, acc, true), | |
do: filter(tail, result, [head | acc], true) | |
defp filter([_head | tail], result, acc, false), | |
do: filter(tail, result, acc, false) | |
defp filter([], result, _acc, _record?), | |
do: Enum.reverse(result) | |
end | |
# records.txt: (remove the #) | |
# rec1 | |
# rec2 | |
# ~~BOM | |
# PVDocMgmt1.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572 | |
# PVDocMgmt2.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572 | |
# ~~ | |
# rec3 | |
# rec4 | |
# ~~BOM | |
# PVDocMgmt3.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572 | |
# PVDocMgmt4.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572 | |
# ~~ | |
# rec5 | |
# rec6 | |
IO.inspect RecordFile.read("records.txt") | |
# OUTPUT | |
# $ elixir record_file.exs | |
# [["PVDocMgmt1.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572", | |
# "PVDocMgmt2.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572"], | |
# ["PVDocMgmt3.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572", | |
# "PVDocMgmt4.dll -- DateTime = 20160906082015;md5=MD5=2e59ba41a50a5ff2ab530519281bc572"]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment