Skip to content

Instantly share code, notes, and snippets.

@mathieu-benoit
Last active September 29, 2017 19:39
Show Gist options
  • Save mathieu-benoit/013990525902f9e991cc55ea31c1e3f7 to your computer and use it in GitHub Desktop.
Save mathieu-benoit/013990525902f9e991cc55ea31c1e3f7 to your computer and use it in GitHub Desktop.
Function - MICR Code Reader
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Azure.WebJobs.Host;
using System;
using System.Drawing;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Tesseract;
namespace MicrCodeReader
{
/// <summary>
/// Some resources which have helped for this implementation:
/// - https://kimsereyblog.blogspot.ca/2016/05/extract-text-from-images-in-f-ocring.html
/// - https://github.com/charlesw/tesseract
/// - https://github.com/wealthsimple/cheque-ocr
/// - https://www.hanselman.com/blog/WhatIsServerlessComputingExploringAzureFunctions.aspx
/// </summary>
public static class MicrCodeReader
{
private const string LANGUAGE_CODE = "micr";
private const string MICR_CHARACTERS = "0123456789abcd";
private const string CANADIAN_CHEQUE_REGEX = @"[0-9]+ca(?<transit>[0-9]{4,5})d(?<institution>[0-9]{3})a(?<account>[dc0-9]+)";
[FunctionName("MicrCodeReader")]
public static async Task<HttpResponseMessage> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post", Route = null)]HttpRequestMessage request, TraceWriter log)
{
var bitmap = (Bitmap)null;
var micrNumbers = string.Empty;
var meanConfidence = float.MinValue;
using (var imageStream = await request.Content.ReadAsStreamAsync())
{
using (var memoryStream = new MemoryStream())
{
imageStream.CopyTo(memoryStream);
memoryStream.Position = 0;
bitmap = new Bitmap(memoryStream);
}
}
//Used when hosted on Azure Function on Azure, not locally (ex: "d:\home\site\wwwroot\bin").
TesseractEnviornment.CustomSearchPath = Environment.GetEnvironmentVariable("TesseractEnviornmentCustomSearchPath");
var dataPath = Environment.GetEnvironmentVariable("TesseractEnviornmentDataPath")/* "d:\home\site\wwwroot" */ ?? @"./tessdata";
using (var engine = new TesseractEngine(dataPath, LANGUAGE_CODE, EngineMode.Default))
{
engine.SetVariable("tessedit_char_whitelist", MICR_CHARACTERS);
using (var pix = PixConverter.ToPix(bitmap))
//using (var pix = Pix.LoadFromFile("./sample_cibc.jpg"))//for local or debug scenario.
{
using (var page = engine.Process(pix))
{
if (page != null)
{
meanConfidence = page.GetMeanConfidence();
var pageText = page.GetText();
if (!string.IsNullOrEmpty(pageText))
{
pageText = pageText.Replace("\n", string.Empty).Replace(" ", string.Empty);
var chequeMatches = (new Regex(CANADIAN_CHEQUE_REGEX)).Match(pageText);
if (!string.IsNullOrEmpty(chequeMatches.Groups["transit"].Value))
{
return request.CreateResponse(HttpStatusCode.OK, new
{
meanConfidence = meanConfidence,
numbers = new
{
transit = removeNonNumericSymbols(chequeMatches.Groups["transit"].Value),
institution = removeNonNumericSymbols(chequeMatches.Groups["institution"].Value),
account = removeNonNumericSymbols(chequeMatches.Groups["account"].Value)
}
});
}
else
{
return request.CreateErrorResponse(HttpStatusCode.InternalServerError, "NO_CHEQUE_NUMBERS_FOUND");
}
}
else
{
return request.CreateErrorResponse(HttpStatusCode.InternalServerError, "NO_TEXT_BLOCKS_FOUND");
}
}
else
{
return request.CreateErrorResponse(HttpStatusCode.InternalServerError, "NO_PAGE_ON_IMAGE_FOUND");
}
}
}
}
}
private static string removeNonNumericSymbols(string text)
{
if (!string.IsNullOrEmpty(text))
return (new Regex(@"\D")).Replace(text, "");
else
return text;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment