Skip to content

Instantly share code, notes, and snippets.

@imranbaloch
Last active February 25, 2022 00:33
Show Gist options
  • Save imranbaloch/4c048d1e0bb615cbe784b73f898957fe to your computer and use it in GitHub Desktop.
Save imranbaloch/4c048d1e0bb615cbe784b73f898957fe to your computer and use it in GitHub Desktop.
// A C# port of MRZ parser at https://github.com/DoubangoTelecom/ultimateMRZ-SDK/blob/71e507ffc33fb5a24a2fa127c72a41f3755f31e0/samples/c%2B%2B/mrz_parser.h#L1
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;
namespace ImranB.Parsers
{
public class MrzData
{
public MrzData()
{
Format = MrzFormat.Unknown;
}
public MrzFormat Format { get; set; }
public string DocumentType { get; internal set; }
public string Country { get; internal set; }
public string Hash { get; internal set; }
public string DocumentNumber { get; internal set; }
public string OptionalData1 { get; internal set; }
public DateTime BirthDate { get; internal set; }
public string Sex { get; internal set; }
public DateTime ExpiryDate { get; internal set; }
public string Nationality { get; internal set; }
public string OptionalData2 { get; internal set; }
public string SurName { get; internal set; }
public string GivenName { get; internal set; }
public string PersonalNumber { get; internal set; }
public string OptionalData { get; internal set; }
}
public enum MrzFormat
{
Unknown,
MachineReadableOfficialTravelDocuments1,
MachineReadableOfficialTravelDocuments2,
MachineReadableOfficialTravelDocuments3,
MachineReadableVisasA,
MachineReadableVisasB,
}
public static class MrzParser
{
private const string RegexAnyChar = "[A-Z0-9<]";
private const string Regex09 = "[0-9]";
private const string Regex09AngleBracket = "[0-9<]";
private const string RegexAZ = "[A-Z]";
private const string RegexSex = "[M|F|X|<]";
private static readonly Func<int, string> RegexAnyCharGroupMatch = (int n) => $"({RegexAnyChar}{{{n}}})";
private static readonly Func<int, string> Regex09GroupMatch = (int n) => $"({Regex09}{{{n}}})";
private static readonly Func<int, string> Regex09AngleBracketGroupMatch = (int n) => $"({Regex09AngleBracket}{{{n}}})";
private static readonly Func<int, string> RegexAZGroupMatch = (int n) => $"({RegexAZ}{{{n}}})";
private static readonly Func<int, string> RegexSexGroupMatch = (int n) => $"({RegexSex}{{{n}}})";
private static readonly string Td1Line0Regex = $"([A|C|I][A-Z0-9<]{{1}}){RegexAZGroupMatch(3)}{RegexAnyCharGroupMatch(9)}{Regex09GroupMatch(1)}{RegexAnyCharGroupMatch(15)}";
private static readonly string Td1Line1Regex = $"{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexSexGroupMatch(1)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexAZGroupMatch(3)}{RegexAnyCharGroupMatch(11)}{Regex09GroupMatch(1)}";
private static readonly string Td1Line2Regex = $"{RegexAnyCharGroupMatch(30)}";
private static readonly string Td2Line0Regex = $"([A|C|I][A-Z0-9<]{{1}}){RegexAZGroupMatch(3)}{RegexAnyCharGroupMatch(31)}";
private static readonly string Td2Line1Regex = $"{RegexAnyCharGroupMatch(9)}{Regex09GroupMatch(1)}{RegexAZGroupMatch(3)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexSexGroupMatch(1)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexAnyCharGroupMatch(7)}{Regex09GroupMatch(1)}";
private static readonly string Td3Line0Regex = $"(P[A-Z0-9<]{{1}}){RegexAZGroupMatch(3)}{RegexAnyCharGroupMatch(39)}";
private static readonly string Td3Line1Regex = $"{RegexAnyCharGroupMatch(9)}{Regex09GroupMatch(1)}{RegexAZGroupMatch(3)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexSexGroupMatch(1)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexAnyCharGroupMatch(14)}{Regex09AngleBracketGroupMatch(1)}{Regex09GroupMatch(1)}";
private static readonly string MrvALine0Regex = $"(V[A-Z0-9<]{{1}}){RegexAZGroupMatch(3)}{RegexAnyCharGroupMatch(39)}";
private static readonly string MrvALine1Regex = $"{RegexAnyCharGroupMatch(9)}{Regex09GroupMatch(1)}{RegexAZGroupMatch(3)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexSexGroupMatch(1)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexAnyCharGroupMatch(16)}";
private static readonly string MrvBLine0Regex = $"(V[A-Z0-9<]{{1}}){RegexAZGroupMatch(3)}{RegexAnyCharGroupMatch(31)}";
private static readonly string MrvBLine1Regex = $"{RegexAnyCharGroupMatch(9)}{Regex09GroupMatch(1)}{RegexAZGroupMatch(3)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexSexGroupMatch(1)}{Regex09GroupMatch(6)}{Regex09GroupMatch(1)}{RegexAnyCharGroupMatch(8)}";
public static MrzData Parse(List<string> lines)
{
var mrzData = new MrzData();
var isSuccessfullyProcessed = false;
try
{
var numberOfLines = lines.Count;
if (numberOfLines != 2 && numberOfLines != 3)
{
Logger.Log($"{numberOfLines} not a valid number of lines. Expecting 2 or 3 lines");
return null;
}
var firstLine = lines[0];
var firstLineLength = firstLine.Length;
if (lines.Any(l => l.Length != firstLineLength))
{
Logger.Log($"All lines must have same length:");
return null;
}
var format = MrzFormat.Unknown;
if (numberOfLines == 3 && firstLineLength == 30)
{
format = MrzFormat.MachineReadableOfficialTravelDocuments1;
}
else
{
if (firstLineLength == 44 && numberOfLines == 2)
{
format = firstLine[0] == 'P' ? MrzFormat.MachineReadableOfficialTravelDocuments3 : MrzFormat.MachineReadableVisasA;
}
else
{
if (firstLineLength == 36 && numberOfLines == 2)
{
format = firstLine[0] == 'V' ? MrzFormat.MachineReadableVisasB : MrzFormat.MachineReadableOfficialTravelDocuments2;
}
else
{
format = MrzFormat.Unknown;
}
}
}
mrzData.Format = format;
if (format == MrzFormat.Unknown)
{
Logger.Log("Invalid type");
return null;
}
switch (format)
{
case MrzFormat.MachineReadableOfficialTravelDocuments1:
isSuccessfullyProcessed = ProcessTravelDocument1(lines, ref mrzData);
break;
case MrzFormat.MachineReadableOfficialTravelDocuments2:
isSuccessfullyProcessed = ProcessTravelDocuments2(lines, ref mrzData);
break;
case MrzFormat.MachineReadableOfficialTravelDocuments3:
isSuccessfullyProcessed = ProcessTravelDocuments3(lines, ref mrzData);
break;
case MrzFormat.MachineReadableVisasA:
isSuccessfullyProcessed = ProcessMachineReadableVisasA(lines, ref mrzData);
break;
case MrzFormat.MachineReadableVisasB:
isSuccessfullyProcessed = ProcessMachineReadableVisasB(lines, ref mrzData);
break;
default:
break;
}
}
catch (Exception ex)
{
Logger.Log(ex);
return null;
}
return isSuccessfullyProcessed ? mrzData : null;
}
private static bool ProcessTravelDocument1(List<string> lines, ref MrzData mrzData)
{
var isSuccessfullyProcessed = ProcessTravelDocument1Line0(lines[0], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
isSuccessfullyProcessed = ProcessTravelDocument1Line1(lines[1], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
isSuccessfullyProcessed = ProcessTravelDocument1Line2(lines[2], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
return true;
}
private static bool ProcessTravelDocument1Line0(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, Td1Line0Regex);
if (!match.Success || match.Groups.Count != 6)
{
Logger.Log("Invalid MRZ TD1, Line 0.");
return false;
}
var groups = match.Groups;
mrzData.DocumentType = groups[1].Value;
mrzData.Country = groups[2].Value;
mrzData.DocumentNumber = groups[3].Value;
//mrzData.Hash = groups[4].Value;
mrzData.OptionalData1 = groups[5].Value;
return true;
}
private static bool ProcessTravelDocument1Line1(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, Td1Line1Regex);
if (!match.Success || match.Groups.Count != 9)
{
Logger.Log("Invalid MRZ TD1, Line 1.");
return false;
}
var groups = match.Groups;
mrzData.BirthDate = DateTime.ParseExact(groups[1].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[2].Value;
mrzData.Sex = groups[3].Value;
mrzData.ExpiryDate = DateTime.ParseExact(groups[4].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[5].Value;
mrzData.OptionalData2 = groups[7].Value;
//mrzData.FinalHash = groups[8].Value;
return true;
}
private static bool ProcessTravelDocument1Line2(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, Td1Line2Regex);
if (!match.Success || match.Groups.Count != 2)
{
Logger.Log("Invalid MRZ TD1, Line 2.");
return false;
}
var groups = match.Groups;
return ProcessNames(groups[1].Value, ref mrzData);
}
private static bool ProcessTravelDocuments2(List<string> lines, ref MrzData mrzData)
{
var isSuccessfullyProcessed = ProcessTravelDocument2Line0(lines[0], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
isSuccessfullyProcessed = ProcessTravelDocument2Line1(lines[1], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
return true;
}
private static bool ProcessTravelDocument2Line0(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, Td2Line0Regex);
if (!match.Success || match.Groups.Count != 4)
{
Logger.Log("Invalid MRZ TD2, Line 0.");
return false;
}
var groups = match.Groups;
mrzData.DocumentType = groups[1].Value;
mrzData.Country = groups[2].Value;
return ProcessNames(groups[3].Value, ref mrzData);
}
private static bool ProcessTravelDocument2Line1(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, Td2Line1Regex);
if (!match.Success || match.Groups.Count != 11)
{
Logger.Log("Invalid MRZ TD2, Line 1.");
return false;
}
var groups = match.Groups;
mrzData.DocumentNumber = groups[1].Value;
//mrzData.Hash = groups[2].Value;
mrzData.Nationality = groups[3].Value;
mrzData.BirthDate = DateTime.ParseExact(groups[4].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[5].Value;
mrzData.Sex = groups[6].Value;
mrzData.ExpiryDate = DateTime.ParseExact(groups[7].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[8].Value;
mrzData.OptionalData2 = groups[9].Value;
//mrzData.FinalHash = groups[10].Value;
return true;
}
private static bool ProcessTravelDocuments3(List<string> lines, ref MrzData mrzData)
{
var isSuccessfullyProcessed = ProcessTravelDocument3Line0(lines[0], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
isSuccessfullyProcessed = ProcessTravelDocument3Line1(lines[1], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
return true;
}
private static bool ProcessTravelDocument3Line0(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, Td3Line0Regex);
if (!match.Success || match.Groups.Count != 4)
{
Logger.Log("Invalid MRZ TD3, Line 0.");
return false;
}
var groups = match.Groups;
mrzData.DocumentType = groups[1].Value;
mrzData.Country = groups[2].Value;
return ProcessNames(groups[3].Value, ref mrzData);
}
private static bool ProcessTravelDocument3Line1(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, Td3Line1Regex);
if (!match.Success || match.Groups.Count != 12)
{
Logger.Log("Invalid MRZ TD3, Line 1.");
return false;
}
var groups = match.Groups;
mrzData.DocumentNumber = groups[1].Value;
//mrzData.Hash = groups[2].Value;
mrzData.Nationality = groups[3].Value;
mrzData.BirthDate = DateTime.ParseExact(groups[4].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[5].Value;
mrzData.Sex = groups[6].Value;
mrzData.ExpiryDate = DateTime.ParseExact(groups[7].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[8].Value;
mrzData.PersonalNumber = groups[9].Value;
//mrzData.Hash = groups[10].Value;
//mrzData.FinalHash = groups[11].Value;
return true;
}
private static bool ProcessMachineReadableVisasA(List<string> lines, ref MrzData mrzData)
{
var isSuccessfullyProcessed = ProcessMachineReadableVisasALine0(lines[0], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
isSuccessfullyProcessed = ProcessMachineReadableVisasALine1(lines[1], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
return true;
}
private static bool ProcessMachineReadableVisasALine0(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, MrvALine0Regex);
if (!match.Success || match.Groups.Count != 4)
{
Logger.Log("Invalid MRZ MRVA, Line 0.");
return false;
}
var groups = match.Groups;
mrzData.DocumentType = groups[1].Value;
mrzData.Country = groups[2].Value;
return ProcessNames(groups[3].Value, ref mrzData);
}
private static bool ProcessMachineReadableVisasALine1(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, MrvALine1Regex);
if (!match.Success || match.Groups.Count != 10)
{
Logger.Log("Invalid MRZ MRVA, Line 1.");
return false;
}
var groups = match.Groups;
mrzData.DocumentNumber = groups[1].Value;
//mrzData.Hash = groups[2].Value;
mrzData.Nationality = groups[3].Value;
mrzData.BirthDate = DateTime.ParseExact(groups[4].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[5].Value;
mrzData.Sex = groups[6].Value;
mrzData.ExpiryDate = DateTime.ParseExact(groups[7].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[8].Value;
mrzData.OptionalData = groups[9].Value;
return true;
}
private static bool ProcessMachineReadableVisasB(List<string> lines, ref MrzData mrzData)
{
var isSuccessfullyProcessed = ProcessMachineReadableVisasBLine0(lines[0], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
isSuccessfullyProcessed = ProcessMachineReadableVisasBLine1(lines[1], ref mrzData);
if (!isSuccessfullyProcessed)
{
return false;
}
return true;
}
private static bool ProcessMachineReadableVisasBLine0(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, MrvBLine0Regex);
if (!match.Success || match.Groups.Count != 4)
{
Logger.Log("Invalid MRZ MRVB, Line 0.");
return false;
}
var groups = match.Groups;
mrzData.DocumentType = groups[1].Value;
mrzData.Country = groups[2].Value;
return ProcessNames(groups[3].Value, ref mrzData);
}
private static bool ProcessMachineReadableVisasBLine1(string line, ref MrzData mrzData)
{
var match = Regex.Match(line, MrvBLine1Regex);
if (!match.Success || match.Groups.Count != 10)
{
Logger.Log("Invalid MRZ MRVB, Line 1.");
return false;
}
var groups = match.Groups;
mrzData.DocumentNumber = groups[1].Value;
//mrzData.Hash = groups[2].Value;
mrzData.Nationality = groups[3].Value;
mrzData.BirthDate = DateTime.ParseExact(groups[4].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[5].Value;
mrzData.Sex = groups[6].Value;
mrzData.ExpiryDate = DateTime.ParseExact(groups[7].Value, "yyMMdd", CultureInfo.InvariantCulture);
//mrzData.Hash = groups[8].Value;
mrzData.OptionalData = groups[9].Value;
return true;
}
private static bool ProcessNames(string nameStr, ref MrzData mrzData)
{
if (string.IsNullOrWhiteSpace(nameStr))
{
return true;
}
var names = nameStr.Replace("<", " ").Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
if (names.Any())
{
mrzData.SurName = names[0];
mrzData.GivenName = string.Join(" ", names.Skip(1));
}
return true;
}
}
public static class Logger
{
public static void Log(string m)
{
Console.WriteLine(m);
}
public static void Log(Exception m)
{
Console.WriteLine(m);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment