This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ISet<HzzoMedsDownloadDto>> Run(ISet<HzzoMedsDownloadDto> meds) | |
{ | |
await Task.WhenAll( | |
// NOTE: due to excel docs designed in different ways, we do this separation of work | |
StartLongRunning(() => ParsePrimaryListsStartingWith2014_02(meds)), | |
StartLongRunning(() => ParseSupplementaryListsStartingWith2014_02(meds)), | |
StartLongRunning(() => ParsePrimaryListsUpTo2014_01(meds)), | |
StartLongRunning(() => ParseSupplementaryListsUpTo2014_01(meds)) | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
using System.Threading.Tasks; | |
using MedsProcessor.Common.Models; | |
namespace MedsProcessor.Parser | |
{ | |
public class HzzoExcelParser | |
{ | |
public Task<ISet<HzzoMedsDownloadDto>> Run(ISet<HzzoMedsDownloadDto> meds) | |
{ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ActionResult> Index( | |
[FromServices] HzzoHtmlScraper scraper, | |
[FromServices] HzzoExcelDownloader downloader) | |
{ | |
var startTime = DateTime.Now; | |
var meds = await downloader.Run(await scraper.Run()); | |
var totalTime = startTime - DateTime.Now; | |
return Ok( | |
$"Done! Handler duration: {totalTime.Duration()}" + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class HzzoMedsDownloadDto | |
{ | |
private readonly string _rootLocation; | |
public HzzoMedsDownloadDto(string href, string validFrom, string rootLocation) | |
{ | |
this.Href = href; | |
this.ValidFrom = DateTime.Parse(validFrom); | |
this._rootLocation = rootLocation; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ISet<HzzoMedsDownloadDto> ParseMedsLiElements(IEnumerable<IElement> elems) => | |
elems.Aggregate(new HashSet<HzzoMedsDownloadDto>(), (medsList, li) => | |
{ | |
var href = li.QuerySelector("a").GetAttribute("href"); | |
// NOTE: this domain is not available, links don't work :-( | |
if (!href.Contains("cdn.hzzo.hr")) | |
{ | |
var dtParts = li.TextContent.TrimEnd().Split(' ').LastOrDefault().Split('.'); | |
var downloadDto = new HzzoMedsDownloadDto( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
readonly AppPathsInfo _appPathsInfo; | |
public HzzoHtmlScraper(IBrowsingContext browsingContext, AppPathsInfo appPathsInfo) | |
{ | |
this._browsingContext = browsingContext; | |
this._appPathsInfo = appPathsInfo; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async Task<HzzoMedsDownloadDto> DownloadExcel(HzzoMedsDownloadDto doc) | |
{ | |
doc.DocumentStream = await _httpCli.GetStreamAsync(doc.Href); | |
return doc; | |
} | |
static Task SaveExcel(HzzoMedsDownloadDto doc) => | |
Task.Factory.StartNew(() => | |
{ | |
using(var fileStream = File.Create(doc.FilePath, BUFFER_SIZE, FileOptions.Asynchronous)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ISet<HzzoMedsDownloadDto>> Run(ISet<HzzoMedsDownloadDto> meds) | |
{ | |
// NOTE: throttle requests in parallel | |
var parallelismDegree = 5; | |
var waitBetweenRequestsMs = 500; | |
var savingItems = new List<Task>(); | |
var notDownloadedDocs = meds.Where(x => !x.IsAlreadyDownloaded).ToList(); | |
for (int i = 0; i < notDownloadedDocs.Count; i += parallelismDegree) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public void ConfigureServices(IServiceCollection services) | |
{ | |
// we need the http client factory: | |
services.AddHttpClient(); | |
services.AddAngleSharp(); | |
services.AddSingleton( | |
s => new AppPathsInfo(s.GetService<IHostingEnvironment>().ContentRootPath)); | |
services.AddSingleton<HzzoHtmlScraper>(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
using System.IO; | |
using System.Net.Http; | |
using System.Threading.Tasks; | |
using MedsProcessor.Common.Models; | |
using static MedsProcessor.Common.Constants; | |
namespace MedsProcessor.Downloader | |
{ | |
public class HzzoExcelDownloader |