Created
          February 4, 2021 19:25 
        
      - 
      
- 
        Save keynslug/a8618587af1368ad9116f54fc55f1268 to your computer and use it in GitHub Desktop. 
    email address parser
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | % Based on RFC5321 (Simple Mail Transfer Protocol) | |
| % https://tools.ietf.org/rfc/rfc5321.txt | |
| % + Added support for RFC6532 via RFC6531 (SMTP Extension for Internationalized Email) | |
| % https://tools.ietf.org/rfc/rfc6532.txt | |
| % https://tools.ietf.org/rfc/rfc6531.txt | |
| Nonterminals | |
| mailbox | |
| localpart | |
| domainpart | |
| domain | |
| addresslit | |
| addresslit_ipv6 | |
| addresslit_ipv4 | |
| ipv6addr | |
| ipv6full | |
| ipv6comp | |
| ipv6comp8 | |
| ipv6comp7 | |
| ipv6comp6 | |
| ipv6comp5 | |
| ipv6comp4 | |
| ipv6comp3 | |
| ipv6comp2 | |
| ipv6comp1 | |
| dotstring | |
| . | |
| Terminals | |
| atext | |
| dtext | |
| qstring | |
| snum | |
| ipv6pre | |
| ipv6hex | |
| '@' | |
| '.' | |
| '[' ']' | |
| ':' '::' | |
| . | |
| Rootsymbol mailbox. | |
| Endsymbol '$end'. | |
| % Mailbox specification | |
| mailbox -> localpart '@' domainpart : {'$1', '$3'}. | |
| % Domain specification | |
| domainpart -> domain : {domain, '$1'}. | |
| domainpart -> addresslit : {address, '$1'}. | |
| domain -> dtext '.' domain : ['$1' | '$3']. | |
| domain -> dtext : '$1'. | |
| addresslit -> '[' addresslit_ipv4 ']' : '$2'. | |
| addresslit -> '[' addresslit_ipv6 ']' : '$2'. | |
| % addresslit -> '[' addresslit_general ']' | |
| % Local part specification | |
| localpart -> dotstring : '$1'. | |
| localpart -> qstring : '$1'. | |
| dotstring -> atext '.' dotstring : ['$1' | '$3']. | |
| dotstring -> dtext '.' dotstring : ['$1' | '$3']. | |
| dotstring -> atext : ['$1']. | |
| dotstring -> dtext : ['$1']. | |
| % Internationalized Email Headers | |
| % UTF8-non-ascii = UTF8-2 / UTF8-3 / UTF8-4 | |
| % UTF-8 Byte Sequences (https://tools.ietf.org/rfc/rfc3629.txt) | |
| % UTF8-2 = %xC2-DF UTF8-tail | |
| % UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / | |
| %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) | |
| % UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / | |
| %xF4 %x80-8F 2( UTF8-tail ) | |
| % UTF8-tail = %x80-BF | |
| % SMTP Address Literals | |
| % dcontent = %d33-90 / % Printable US-ASCII | |
| %d94-126 % excl. "[", "\", "]" | |
| addresslit_ipv4 -> snum '.' snum '.' snum '.' snum : {'$1', '$3', '$5', '$7'}. | |
| addresslit_ipv6 -> ipv6pre ipv6addr : '$2'. | |
| ipv6addr -> ipv6full : '$1'. | |
| ipv6addr -> ipv6comp : '$1'. | |
| % ipv6addr -> ipv6v4full | |
| % ipv6addr -> ipv6v4comp | |
| ipv6full -> ipv6hex ':' ipv6hex ':' ipv6hex ':' ipv6hex ':' ipv6hex ':' ipv6hex ':' ipv6hex ':' ipv6hex : {'$1', '$3', '$5', '$7', '$9', '$11', '$13', '$15'}. | |
| ipv6comp -> ipv6comp8 : '$1'. | |
| ipv6comp8 -> ipv6hex ':' ipv6comp7 : ['$1' | '$3']. | |
| ipv6comp8 -> '::' ipv6comp7 : '$2'. | |
| ipv6comp7 -> ipv6hex ':' ipv6comp6 : ['$1' | '$3']. | |
| ipv6comp7 -> '::' ipv6comp6 : '$2'. | |
| ipv6comp6 -> ipv6hex ':' ipv6comp5 : ['$1' | '$3']. | |
| ipv6comp6 -> '::' ipv6comp5 : '$2'. | |
| ipv6comp5 -> ipv6hex ':' ipv6comp4 : ['$1' | '$3']. | |
| ipv6comp5 -> '::' ipv6comp4 : '$2'. | |
| ipv6comp4 -> ipv6hex ':' ipv6comp3 : ['$1' | '$3']. | |
| ipv6comp4 -> '::' ipv6comp3 : '$2'. | |
| ipv6comp3 -> ipv6hex ':' ipv6comp2 : ['$1' | '$3']. | |
| ipv6comp3 -> '::' ipv6comp2 : '$2'. | |
| ipv6comp2 -> ipv6hex ':' ipv6comp1 : ['$1' | '$3']. | |
| ipv6comp2 -> '::' ipv6comp1 : '$2'. | |
| ipv6comp1 -> ipv6hex ':' ipv6hex : ['$1', '$3']. | |
| ipv6comp1 -> '::' ipv6hex : ['$2']. | |
| ipv6comp1 -> '::' : []. | |
| % IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal | |
| % General-address-literal = Standardized-tag ":" 1*dcontent | |
| % Standardized-tag = Ldh-str | |
| % IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::" | |
| % [IPv6-hex *3(":" IPv6-hex) ":"] | |
| % IPv4-address-literal | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | Definitions. | |
| DIGIT = [0-9] | |
| ALPHA = [a-zA-Z] | |
| HEXDIG = ({DIGIT}|[A-F]) | |
| LET = ({ALPHA}|{DIGIT}) | |
| LDH = ({ALPHA}|{DIGIT}|-) | |
| ATEXT = ({ALPHA}|{DIGIT}|[!#$%&'*+\-/=?^_`{|}~]) | |
| QPAIR = \[\d32-\d126] | |
| QTEXT = [\d32-\d33\d35-\d91\d93-\d126] | |
| IPV6PRE = IPv6: | |
| Rules. | |
| (::|[@:.\[\]]) : | |
| {token, {list_to_atom(TokenChars), TokenLine, TokenChars}}. | |
| [12]?{DIGIT}?{DIGIT} : | |
| {token, {snum, TokenLine, TokenChars}}. | |
| {HEXDIG}?{HEXDIG}?{HEXDIG}?{HEXDIG} : | |
| {token, {ipv6hex, TokenLine, TokenChars}}. | |
| {LET}{LDH}*{LET} : | |
| {token, {dtext, TokenLine, TokenChars}}. | |
| {ATEXT}+ : | |
| {token, {atext, TokenLine, TokenChars}}. | |
| {IPV6PRE} : | |
| {token, {ipv6pre, TokenLine, TokenChars}}. | |
| "({QPAIR}|{QTEXT})" : | |
| {token, {qstring, TokenLine, TokenChars}}. | |
| Erlang code. | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment