This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ISet<HzzoMedsDownloadDto> ParseMedsLiElements(IEnumerable<IElement> elems) => | |
elems.Aggregate(new HashSet<HzzoMedsDownloadDto>(), (medsList, li) => | |
{ | |
var href = li.QuerySelector("a").GetAttribute("href"); | |
// NOTE: this domain is not available, links don't work :-( | |
if (!href.Contains("cdn.hzzo.hr")) | |
{ | |
var dtParts = li.TextContent.TrimEnd().Split(' ').LastOrDefault().Split('.'); | |
var downloadDto = new HzzoMedsDownloadDto( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class HzzoMedsDownloadDto | |
{ | |
private readonly string _rootLocation; | |
public HzzoMedsDownloadDto(string href, string validFrom, string rootLocation) | |
{ | |
this.Href = href; | |
this.ValidFrom = DateTime.Parse(validFrom); | |
this._rootLocation = rootLocation; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ActionResult> Index( | |
[FromServices] HzzoHtmlScraper scraper, | |
[FromServices] HzzoExcelDownloader downloader) | |
{ | |
var startTime = DateTime.Now; | |
var meds = await downloader.Run(await scraper.Run()); | |
var totalTime = startTime - DateTime.Now; | |
return Ok( | |
$"Done! Handler duration: {totalTime.Duration()}" + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
using System.Threading.Tasks; | |
using MedsProcessor.Common.Models; | |
namespace MedsProcessor.Parser | |
{ | |
public class HzzoExcelParser | |
{ | |
public Task<ISet<HzzoMedsDownloadDto>> Run(ISet<HzzoMedsDownloadDto> meds) | |
{ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public async Task<ISet<HzzoMedsDownloadDto>> Run(ISet<HzzoMedsDownloadDto> meds) | |
{ | |
await Task.WhenAll( | |
// NOTE: due to excel docs designed in different ways, we do this separation of work | |
StartLongRunning(() => ParsePrimaryListsStartingWith2014_02(meds)), | |
StartLongRunning(() => ParseSupplementaryListsStartingWith2014_02(meds)), | |
StartLongRunning(() => ParsePrimaryListsUpTo2014_01(meds)), | |
StartLongRunning(() => ParseSupplementaryListsUpTo2014_01(meds)) | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static readonly DateTime filterDtStartWith2014 = new DateTime(2014, 1, 3); | |
void ParseSupplementaryListsUpTo2014_01(ISet<HzzoMedsDownloadDto> meds) => | |
ParseHzzoExcelDocuments(meds.Where(x => | |
x.ValidFrom <= filterDtStartWith2014 && | |
( | |
x.FileName.ToLowerInvariant().Contains("dopunska") || | |
x.FileName.ToLowerInvariant().Contains("dll") | |
)), DrugListType.Supplementary, false); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class DrugImportDto | |
{ | |
public int RowId { get; set; } | |
public DrugListType ListType { get; set; } | |
public DateTime ValidFrom { get; set; } | |
public string AtkCode { get; set; } | |
public DrugApplicationTypeLimitation ApplicationTypeLimitation { get; set; } | |
public string GenericName { get; set; } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
namespace MedsProcessor.Parser | |
{ | |
public enum DrugListType | |
{ | |
Undefined = 0, | |
Primary = 1, | |
Supplementary = 2 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static ISheet OpenWorkbookSheetWithNpoi(FileStream stream, HzzoMedsDownloadDto med, HzzoMedsDownloadDto latestMed) | |
{ | |
ISheet drugListSheet = null; | |
try | |
{ | |
if (med.FileName.ToLowerInvariant().EndsWith(".xls")) | |
{ | |
var hssfWorkbook = new HSSFWorkbook(stream); | |
drugListSheet = hssfWorkbook.GetSheetAt(0); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void ParseHzzoExcelDocuments(IEnumerable<HzzoMedsDownloadDto> filteredMeds, DrugListType listType, bool isListStartingWith2014) | |
{ | |
HzzoMedsDownloadDto latestMed = null; | |
int latestRow = 0; | |
int latestCol = 0; | |
try | |
{ | |
Parallel.ForEach(filteredMeds, med => | |
{ |