Last active
June 22, 2020 11:49
-
-
Save Legends/cd561e7f0a9667c9e4600f925d327df7 to your computer and use it in GitHub Desktop.
Puppeteer Sharp - Scraping using C# and puppeteer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private async Task<DtoEst> LoadXXXAsync() | |
{ | |
// downloads chromium to the local project, needed by Puppeteer Sharp for execution, takes about 2 min! | |
await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision); | |
DtoEst dto = new DtoEst(); | |
var url = "https://www.xxxxxx.xhtml"; | |
#region SELECTORS | |
// https://stackoverflow.com/questions/45110893/select-elements-by-attributes-with-colon | |
// use css escapes: https://mothereff.in/css-escapes | |
string id_checkbox_IsMarried = "bmf_form_ekst:ekst_pv:0"; | |
string id_input_Umsatz = "bmf_form_ekst:ekst_zve"; | |
string id_button_Submit2 = @"#bmf_form_ekst\3A income_ekst"; | |
#endregion | |
//// This statement downloads and installs chromium | |
//await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision); | |
var browser = await Puppeteer.LaunchAsync(new LaunchOptions | |
{ | |
Headless = true | |
}); | |
using (var page = await browser.NewPageAsync()) | |
{ | |
// load page | |
await page.GoToAsync(url); | |
// wait for element to be there | |
await page.WaitForExpressionAsync($"document.getElementById('{id_input_Umsatz}')!=null"); | |
await page.EvaluateExpressionAsync($"document.getElementById('{id_input_Umsatz}').value = '{NettoUmsatz}';"); | |
// get element value | |
var val = await page.EvaluateFunctionAsync<string>($"()=>document.getElementById('{id_input_Umsatz}').value"); | |
// wait for element to be there | |
await page.WaitForExpressionAsync($"document.getElementById('{id_checkbox_IsMarried}')!=null"); | |
await page.EvaluateExpressionAsync($"document.getElementById('{id_checkbox_IsMarried}').checked = true;"); | |
await page.ClickAsync(id_button_Submit2); | |
// wait for redirect | |
await page.WaitForNavigationAsync(); | |
// get value from element | |
var strValueEst = await page.EvaluateFunctionAsync<string>("()=>document.querySelector('#ui-id-4 > div.ekst_ergebnis > div:nth-child(6) > table > tbody > tr:nth-child(3) > td:nth-child(2) > strong').textContent"); | |
// get value from element | |
var strValueEstPercent = await page.EvaluateFunctionAsync<string>("()=>document.querySelector('#ui-id-4 > div.ekst_ergebnis > div:nth-child(6) > table > tbody > tr:nth-child(3) > td:nth-child(3) > strong').textContent"); | |
var deDE = new CultureInfo("de-DE"); | |
Decimal.TryParse(strValueEst.TrimEnd(" Euro".ToCharArray()), NumberStyles.AllowThousands | NumberStyles.AllowDecimalPoint, deDE, out var decValueEst); | |
Decimal.TryParse(strValueEstPercent.TrimEnd(" %".ToCharArray()), NumberStyles.AllowThousands | NumberStyles.AllowDecimalPoint, deDE, out var decValueEstPercent); | |
dto.EstBetrag = decValueEst; | |
dto.EstProzentsatz = decValueEstPercent; | |
} | |
return await Task.FromResult(dto); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment