Created
September 29, 2016 21:45
-
-
Save andygjp/582c9e74c1f09fbe356640caa0c87a1b to your computer and use it in GitHub Desktop.
Opens the files specified and strips out the emails
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace ExtractEmails | |
{ | |
using System; | |
using System.Collections.Concurrent; | |
using System.IO; | |
using System.Linq; | |
using System.Text.RegularExpressions; | |
public class Program | |
{ | |
private static ConcurrentDictionary<string, int> emails = new ConcurrentDictionary<string, int>(); | |
public static void Main(string[] args) | |
{ | |
args.AsParallel() | |
.Select(x => new FileInfo(x)) | |
.Where(x => x.Exists) | |
.ForAll(x => | |
{ | |
var regex = new Regex("[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?"); | |
using (var text = x.OpenText()) | |
while (text.EndOfStream == false) | |
{ | |
var readLine = text.ReadLine(); | |
var match = regex.Match(readLine); | |
AddIfMatched(match); | |
} | |
}); | |
using (var writer = File.CreateText("results.txt")) | |
foreach (var e in emails) | |
{ | |
writer.WriteLine(e.Key + ", " + e.Value); | |
} | |
} | |
private static void AddIfMatched(Match match) | |
{ | |
if (match.Success == false) return; | |
var email = match.Captures[0]; | |
emails.AddOrUpdate(email.Value, 1, (s, i) => ++i); | |
AddIfMatched(match.NextMatch()); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment