For example alpine
, it's only about 5MB.
RUN apk add --no-cache tzdata
ENV TZ America/New_York
#!/bin/bash | |
gdb -p "$1" -batch -ex 'set {short}$rip = 0x050f' -ex 'set $rax=231' -ex 'set $rdi=0' -ex 'cont' |
year | ct | amt | kind | |
---|---|---|---|---|
2011 | 0 | 0 | Filing Date | |
2011 | 2 | 10 | Transaction Date | |
2012 | 0 | 0 | Filing Date | |
2012 | 8 | 0.4 | Transaction Date | |
2013 | 9 | 0.4 | Filing Date | |
2013 | 24 | 35 | Transaction Date | |
2014 | 19 | 2.5 | Filing Date | |
2014 | 38 | 11 | Transaction Date | |
2015 | 60 | 54 | Filing Date |
// XPath CheatSheet | |
// To test XPath in your Chrome Debugger: $x('/html/body') | |
// http://www.jittuu.com/2012/2/14/Testing-XPath-In-Chrome/ | |
// 0. XPath Examples. | |
// More: http://xpath.alephzarro.com/content/cheatsheet.html | |
'//hr[@class="edge" and position()=1]' // every first hr of 'edge' class |
get_page <- function(page_num = 1) { | |
# this is to be kind to the web site | |
# it does not have a robots.txt so this should be the default wait | |
# time between requests since the desires of the scraper are not | |
# greater than that of the site owner and you'd be abusing | |
# their resources if you did not put a delay in between requests | |
Sys.sleep(5) | |
; | |
WITH CTE AS | |
( | |
SELECT | |
*, | |
ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code | |
ORDER BY | |
[doc_date]) - ROW_NUMBER() OVER (PARTITION BY [product_id], shop_code, mark_1 |
library(rvest) | |
library(dplyr) | |
pg <- read_html("https://bidplus.gem.gov.in/bidresultlists") | |
blocks <- html_nodes(pg, ".block") | |
items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]") | |
items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE) |
Since cognitive processes are challenging for Python programmers, this is pretty much what the (not great idiom in) the appropriately-acronym'd BS 4 text => get_text() does (https://github.com/wention/BeautifulSoup4/blob/03a2b3a9d1fc5877212d9d382a512663f24c887d/bs4/element.py#L846-L854). There are FAR BETTER WAYS to get text than this, but I would not expect Python things to grok that.
library(httr) | |
library(rvest) | |
library(readxl) | |
library(tidyverse) | |
doe <- read_html("https://www.oe.netl.doe.gov/OE417_annual_summary.aspx") | |
dir.create("~/Data/doe-cache-dir", showWarnings = FALSE) | |
html_nodes(doe, xpath=".//a[contains(., 'XLS')]") %>% |
launchctl unload -w -S Aqua /System/Library/LaunchAgents/gpg.agent.daemon.plist | |
launchctl load -w -S Aqua /System/Library/LaunchAgents/gpg.agent.daemon.plist |