Last active
November 25, 2017 01:22
-
-
Save hipertracker/3dfdac5f109519173a470193a7653e93 to your computer and use it in GitHub Desktop.
Sigla parser in Elixir
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| defmodule Biblia.VersesParser do | |
| @moduledoc """ | |
| Documentation for Biblia.VersesParser | |
| """ | |
| def extract_verses(query, bibles) do | |
| refs_with_chapter(query, bibles) | |
| |> Enum.map(fn item -> | |
| verses = | |
| case Regex.split(~r"\s*:\s*", item[:ref]) do | |
| [_, verses_str] -> | |
| tokens = Regex.split(~r"\s*,\s*", verses_str) | |
| verses_range(tokens, []) | |
| _ -> | |
| [1..9999] | |
| end | |
| Map.merge(item, %{verses: verses}) | |
| end) | |
| end | |
| def parse_siglum(token) do | |
| with_book = ~r/^([\d\w]\w\w)\s*(.+)/ | |
| without_book = ~r/^(\d?\w+)\s*(.+)/ | |
| if Regex.match?(with_book, token) do | |
| Regex.run(with_book, token) | |
| else | |
| if Regex.match?(without_book, token) do | |
| Regex.run(without_book, token) | |
| else | |
| token | |
| end | |
| end | |
| end | |
| def parse_sigla([], _bible, acc) do | |
| acc | |
| end | |
| def parse_sigla([head | tail], bible, acc) do | |
| new_acc = | |
| case parse_siglum(String.trim(head)) do | |
| [_, maybe_book, ref] -> | |
| case Integer.parse(maybe_book) do | |
| :error -> | |
| [%{bible: bible, book: maybe_book, ref: ref} | acc] | |
| {_, ""} -> | |
| [prev_item | _rest] = acc | |
| prev_book = Map.get(prev_item, :book) | |
| record = %{bible: bible, book: prev_book, ref: head} | |
| [record | acc] | |
| {bookNumber, bookText} -> | |
| record = %{bible: bible, book: "#{bookNumber}#{bookText}", ref: ref} | |
| [record | acc] | |
| end | |
| chapter -> | |
| [prev_item | _rest] = acc | |
| prev_book = Map.get(prev_item, :book) | |
| record = %{bible: bible, book: prev_book, ref: chapter} | |
| [record | acc] | |
| end | |
| parse_sigla(tail, bible, new_acc) | |
| end | |
| def split_refs(query, bibles) do | |
| Enum.reduce(bibles, [], fn bible, acc -> | |
| tokens = Regex.split(~r{\s*[;\|]\s*}, query) | |
| parsed_sigla = parse_sigla(tokens, bible, []) | |
| acc ++ :lists.reverse(parsed_sigla) | |
| end) | |
| end | |
| def refs_with_chapter(query, bibles) do | |
| split_refs(query, bibles) | |
| |> Enum.map(fn item -> | |
| tokens = Regex.split(~r"\s*:\s*", item[:ref]) | |
| {chapter, _} = | |
| tokens | |
| |> List.first() | |
| |> Integer.parse() | |
| Map.merge(item, %{chapter: chapter}) | |
| end) | |
| end | |
| defp verses_range([], acc) do | |
| acc | |
| end | |
| defp verses_range([head | tail], acc) do | |
| verses = | |
| case Regex.split(~r"-", head) do | |
| [from_verse, to_verse] -> | |
| to_i(from_verse)..to_i(to_verse) | |
| [from_verse] -> | |
| if Regex.run(~r"nn", head) do | |
| to_i(Regex.replace(~r"nn", head, ""))..9999 | |
| else | |
| to_i(from_verse) | |
| end | |
| end | |
| new_acc = acc ++ [verses] | |
| verses_range(tail, new_acc) | |
| end | |
| def to_i(s) do | |
| case Integer.parse(s) do | |
| {num, _} -> num | |
| :error -> 0 | |
| end | |
| end | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| defmodule Biblia.VersesParserTest do | |
| @moduledoc nil | |
| use ExUnit.Case | |
| doctest Biblia.VersesParser | |
| alias Biblia.VersesParser | |
| test "extract_verses Gen 3:4" do | |
| query = "Gen 3:4" | |
| expected = [ | |
| %{bible: "niv", book: "Gen", chapter: 3, verses: [4], ref: "3:4"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| test "extract_verses 1Co 3:4" do | |
| query = "1Co 3:4-5" | |
| expected = [ | |
| %{bible: "niv", book: "1Co", chapter: 3, verses: [4..5], ref: "3:4-5"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| test "extract_verses 1Co 3:4-5; 7:8; 9:1nn" do | |
| query = "1Co 3:4-5; 7:8; 9:1nn" | |
| expected = [ | |
| %{bible: "niv", book: "1Co", chapter: 3, verses: [4..5], ref: "3:4-5"}, | |
| %{bible: "niv", book: "1Co", chapter: 7, verses: [8], ref: "7:8"}, | |
| %{bible: "niv", book: "1Co", chapter: 9, verses: [1..9999], ref: "9:1nn"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| test "extract_verses 1Co 3" do | |
| query = "1Co 3" | |
| expected = [ | |
| %{bible: "niv", book: "1Co", chapter: 3, verses: [1..9999], ref: "3"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| test "extract_verses 1Co 3; 4" do | |
| query = "1Co 3; 4" | |
| expected = [ | |
| %{bible: "niv", book: "1Co", chapter: 3, verses: [1..9999], ref: "3"}, | |
| %{bible: "niv", book: "1Co", chapter: 4, verses: [1..9999], ref: "4"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| test "extract_verses 1Co 3; 4:5" do | |
| query = "1Co 3; 4:5" | |
| expected = [ | |
| %{bible: "niv", book: "1Co", chapter: 3, verses: [1..9999], ref: "3"}, | |
| %{bible: "niv", book: "1Co", chapter: 4, verses: [5], ref: "4:5"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| test "extract_verses 1Co 3:4, 6" do | |
| query = "1Co 3:4, 6" | |
| expected = [ | |
| %{bible: "niv", book: "1Co", chapter: 3, verses: [4, 6], ref: "3:4, 6"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| test "extract_verses 1Co 3:4nn" do | |
| query = "1Co 3:4nn" | |
| expected = [ | |
| %{bible: "niv", book: "1Co", chapter: 3, verses: [4..9999], ref: "3:4nn"} | |
| ] | |
| assert VersesParser.extract_verses(query, ["niv"]) == expected | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment