Created
August 26, 2016 22:27
-
-
Save tassoevan/243fc2cb7714ee2ed9cf7cd3a9fabf5e to your computer and use it in GitHub Desktop.
Validating Email Addresses with a Regex? Do yourself a favor and don’t http://blog.onyxbits.de/validating-email-addresses-with-a-regex-do-yourself-a-favor-and-dont-391/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
public class EmailValidator { | |
public static boolean isValid(final char[] input) { | |
if (input == null) { | |
return false; | |
} | |
int state = 0; | |
char ch; | |
int index = 0; | |
int mark = 0; | |
String local = null; | |
ArrayList<String> domain = new ArrayList<String>(); | |
while (index <= input.length && state != -1) { | |
// Dealing with a char instead of char[] makes life a lot easier! | |
if (index == input.length) { | |
ch = '\0'; // We need to encode the end by using a terminator | |
} | |
else { | |
ch = input[index]; | |
if (ch == '\0') { | |
// but the terminator may not be part of the input! | |
return false; | |
} | |
} | |
switch (state) { | |
case 0: { | |
// Transition on {atext} | |
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') | |
|| (ch >= '0' && ch <= '9') || ch == '_' || ch == '-' | |
|| ch == '+') { | |
state = 1; | |
break; | |
} | |
// Unexpected Character -> Error state | |
state = -1; | |
break; | |
} | |
case 1: { | |
// Consume {atext} | |
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') | |
|| (ch >= '0' && ch <= '9') || ch == '_' || ch == '-' | |
|| ch == '+') { | |
break; | |
} | |
if (ch == '.') { | |
state = 2; | |
break; | |
} | |
if (ch == '@') { // Endof local part | |
local = new String(input, 0, index - mark); | |
mark = index + 1; | |
state = 3; | |
break; | |
} | |
// Unexpected Character -> Error state | |
state = -1; | |
break; | |
} | |
case 2: { | |
// Transition on {atext} | |
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') | |
|| (ch >= '0' && ch <= '9') || ch == '_' || ch == '-' | |
|| ch == '+') { | |
state = 1; | |
break; | |
} | |
// Unexpected Character -> Error state | |
state = -1; | |
break; | |
} | |
case 3: { | |
// Transition on {alnum} | |
if ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') | |
|| (ch >= 'A' && ch <= 'Z')) { | |
state = 4; | |
break; | |
} | |
// Unexpected Character -> Error state | |
state = -1; | |
break; | |
} | |
case 4: { | |
// Consume {alnum} | |
if ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') | |
|| (ch >= 'A' && ch <= 'Z')) { | |
break; | |
} | |
if (ch == '-') { | |
state = 5; | |
break; | |
} | |
if (ch == '.') { | |
domain.add(new String(input, mark, index - mark)); | |
mark = index + 1; | |
state = 5; | |
break; | |
} | |
// Match EOL | |
if (ch == '\0') { | |
domain.add(new String(input, mark, index - mark)); | |
state = 6; | |
break; // EOL -> Finish | |
} | |
// Unexpected Character -> Error state | |
state = -1; | |
break; | |
} | |
case 5: { | |
if ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') | |
|| (ch >= 'A' && ch <= 'Z')) { | |
state = 4; | |
break; | |
} | |
if (ch == '-') { | |
break; | |
} | |
// Unexpected Character -> Error state | |
state = -1; | |
break; | |
} | |
case 6: { | |
// Success! (we don't really get here, though) | |
break; | |
} | |
} | |
index++; | |
} | |
// Sanity checks | |
// Input not accepted | |
if (state != 6) | |
return false; | |
// Require at least a second level domain | |
if (domain.size() < 2) | |
return false; | |
// RFC 5321 limits the length of the local part | |
if (local.length() > 64) | |
return false; | |
// RFC 5321 limits the total length of an address | |
if (input.length > 254) | |
return false; | |
// TLD must only consist of letters and be at least two characters long. | |
index = input.length - 1; | |
while (index > 0) { | |
ch = input[index]; | |
if (ch == '.' && input.length - index > 2) { | |
return true; | |
} | |
if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) { | |
return false; | |
} | |
index--; | |
} | |
return true; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment