Last active
September 29, 2017 19:39
-
-
Save mathieu-benoit/013990525902f9e991cc55ea31c1e3f7 to your computer and use it in GitHub Desktop.
Function - MICR Code Reader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Microsoft.Azure.WebJobs; | |
using Microsoft.Azure.WebJobs.Extensions.Http; | |
using Microsoft.Azure.WebJobs.Host; | |
using System; | |
using System.Drawing; | |
using System.IO; | |
using System.Net; | |
using System.Net.Http; | |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
using Tesseract; | |
namespace MicrCodeReader | |
{ | |
/// <summary> | |
/// Some resources which have helped for this implementation: | |
/// - https://kimsereyblog.blogspot.ca/2016/05/extract-text-from-images-in-f-ocring.html | |
/// - https://github.com/charlesw/tesseract | |
/// - https://github.com/wealthsimple/cheque-ocr | |
/// - https://www.hanselman.com/blog/WhatIsServerlessComputingExploringAzureFunctions.aspx | |
/// </summary> | |
public static class MicrCodeReader | |
{ | |
private const string LANGUAGE_CODE = "micr"; | |
private const string MICR_CHARACTERS = "0123456789abcd"; | |
private const string CANADIAN_CHEQUE_REGEX = @"[0-9]+ca(?<transit>[0-9]{4,5})d(?<institution>[0-9]{3})a(?<account>[dc0-9]+)"; | |
[FunctionName("MicrCodeReader")] | |
public static async Task<HttpResponseMessage> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)]HttpRequestMessage request, TraceWriter log) | |
{ | |
var bitmap = (Bitmap)null; | |
var micrNumbers = string.Empty; | |
var meanConfidence = float.MinValue; | |
using (var imageStream = await request.Content.ReadAsStreamAsync()) | |
{ | |
using (var memoryStream = new MemoryStream()) | |
{ | |
imageStream.CopyTo(memoryStream); | |
memoryStream.Position = 0; | |
bitmap = new Bitmap(memoryStream); | |
} | |
} | |
//Used when hosted on Azure Function on Azure, not locally (ex: "d:\home\site\wwwroot\bin"). | |
TesseractEnviornment.CustomSearchPath = Environment.GetEnvironmentVariable("TesseractEnviornmentCustomSearchPath"); | |
var dataPath = Environment.GetEnvironmentVariable("TesseractEnviornmentDataPath")/* "d:\home\site\wwwroot" */ ?? @"./tessdata"; | |
using (var engine = new TesseractEngine(dataPath, LANGUAGE_CODE, EngineMode.Default)) | |
{ | |
engine.SetVariable("tessedit_char_whitelist", MICR_CHARACTERS); | |
using (var pix = PixConverter.ToPix(bitmap)) | |
//using (var pix = Pix.LoadFromFile("./sample_cibc.jpg"))//for local or debug scenario. | |
{ | |
using (var page = engine.Process(pix)) | |
{ | |
if (page != null) | |
{ | |
meanConfidence = page.GetMeanConfidence(); | |
var pageText = page.GetText(); | |
if (!string.IsNullOrEmpty(pageText)) | |
{ | |
pageText = pageText.Replace("\n", string.Empty).Replace(" ", string.Empty); | |
var chequeMatches = (new Regex(CANADIAN_CHEQUE_REGEX)).Match(pageText); | |
if (!string.IsNullOrEmpty(chequeMatches.Groups["transit"].Value)) | |
{ | |
return request.CreateResponse(HttpStatusCode.OK, new | |
{ | |
meanConfidence = meanConfidence, | |
numbers = new | |
{ | |
transit = removeNonNumericSymbols(chequeMatches.Groups["transit"].Value), | |
institution = removeNonNumericSymbols(chequeMatches.Groups["institution"].Value), | |
account = removeNonNumericSymbols(chequeMatches.Groups["account"].Value) | |
} | |
}); | |
} | |
else | |
{ | |
return request.CreateErrorResponse(HttpStatusCode.InternalServerError, "NO_CHEQUE_NUMBERS_FOUND"); | |
} | |
} | |
else | |
{ | |
return request.CreateErrorResponse(HttpStatusCode.InternalServerError, "NO_TEXT_BLOCKS_FOUND"); | |
} | |
} | |
else | |
{ | |
return request.CreateErrorResponse(HttpStatusCode.InternalServerError, "NO_PAGE_ON_IMAGE_FOUND"); | |
} | |
} | |
} | |
} | |
} | |
private static string removeNonNumericSymbols(string text) | |
{ | |
if (!string.IsNullOrEmpty(text)) | |
return (new Regex(@"\D")).Replace(text, ""); | |
else | |
return text; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment