Skip to content

Instantly share code, notes, and snippets.

View vmandic's full-sized avatar
🤠
chillin'

Vedran Mandić vmandic

🤠
chillin'
View GitHub Profile
@vmandic
vmandic / HzzoMedsDownloadDto.cs
Created October 13, 2018 18:38
meds-processor, part 1 of 4, snippet 8
using System;
using System.Linq;
namespace MedsProcessor.Common.Models
{
public class HzzoMedsDownloadDto
{
private readonly string _rootLocation;
public HzzoMedsDownloadDto(string href, string validFrom, string rootLocation)
@vmandic
vmandic / HzzoHtmlScraper.cs
Last active October 16, 2018 17:24
meds-processor, part 1 of 4, snippet 9
ISet<HzzoMedsDownloadDto> ParseHtmlDocuments(IDocument[] docs) =>
docs.Aggregate(
new HashSet<HzzoMedsDownloadDto>(),
(docList, doc) => new HashSet<HzzoMedsDownloadDto>(docList.Concat(ParseHtmlDocument(doc)))
);
static ISet<HzzoMedsDownloadDto> ParseMedsLiElements(IEnumerable<IElement> elems) =>
elems.Aggregate(new HashSet<HzzoMedsDownloadDto>(), (medsList, li) =>
{
var href = li.QuerySelector("a").GetAttribute("href");
@vmandic
vmandic / AppController.cs
Created October 13, 2018 18:51
meds-processor, part 1 of 4, snippet 10
public async Task<ActionResult> Index([FromServices] HzzoHtmlScraper scraper)
{
var startTime = DateTime.Now;
// TODO: implement scraper and parser logic
var meds = await scraper.Run();
var totalTime = startTime - DateTime.Now;
return Ok(
$"Done! Handler duration: {totalTime.Duration()}" +
@vmandic
vmandic / AppPathsInfo.cs
Created October 16, 2018 18:59
meds-processor, p2, s1
using System;
namespace MedsProcessor.Common.Models
{
public sealed class AppPathsInfo
{
public AppPathsInfo(string appRootPath)
{
if (appRootPath == null)
throw new ArgumentNullException(nameof(appRootPath));
@vmandic
vmandic / Startup.cs
Last active October 19, 2018 18:46
meds-processor, p2, s2
public void ConfigureServices(IServiceCollection services)
{
services.AddAngleSharp();
// adding the AppPathsInfo singleton instance carrying the ContentRootPath
// provided by the IHostingEnvironment injected instance
services.AddSingleton(
s => new AppPathsInfo(s.GetService<IHostingEnvironment>().ContentRootPath));
services.AddSingleton<HzzoHtmlScraper>();
@vmandic
vmandic / HzzoExcelDownloader.cs
Last active October 20, 2018 09:02
meds-processor, p2, s3
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Threading.Tasks;
using MedsProcessor.Common.Models;
using static MedsProcessor.Common.Constants;
namespace MedsProcessor.Downloader
{
public class HzzoExcelDownloader
@vmandic
vmandic / Startup.cs
Created October 18, 2018 19:03
meds-processor, p2, s4
public void ConfigureServices(IServiceCollection services)
{
// we need the http client factory:
services.AddHttpClient();
services.AddAngleSharp();
services.AddSingleton(
s => new AppPathsInfo(s.GetService<IHostingEnvironment>().ContentRootPath));
services.AddSingleton<HzzoHtmlScraper>();
@vmandic
vmandic / HzzoExcelDownloader.cs
Last active October 19, 2018 19:16
meds-processor, p2, s5
public async Task<ISet<HzzoMedsDownloadDto>> Run(ISet<HzzoMedsDownloadDto> meds)
{
// NOTE: throttle requests in parallel
var parallelismDegree = 5;
var waitBetweenRequestsMs = 500;
var savingItems = new List<Task>();
var notDownloadedDocs = meds.Where(x => !x.IsAlreadyDownloaded).ToList();
for (int i = 0; i < notDownloadedDocs.Count; i += parallelismDegree)
@vmandic
vmandic / HzzoExcelDownloader.cs
Created October 19, 2018 19:19
meds-processor, p2, s6
async Task<HzzoMedsDownloadDto> DownloadExcel(HzzoMedsDownloadDto doc)
{
doc.DocumentStream = await _httpCli.GetStreamAsync(doc.Href);
return doc;
}
static Task SaveExcel(HzzoMedsDownloadDto doc) =>
Task.Factory.StartNew(() =>
{
using(var fileStream = File.Create(doc.FilePath, BUFFER_SIZE, FileOptions.Asynchronous))
@vmandic
vmandic / HzzoHtmlScraper.cs
Last active October 19, 2018 19:32
meds-processor, p2, s7
readonly AppPathsInfo _appPathsInfo;
public HzzoHtmlScraper(IBrowsingContext browsingContext, AppPathsInfo appPathsInfo)
{
this._browsingContext = browsingContext;
this._appPathsInfo = appPathsInfo;
}