In general, AWS services can be accessed using
- AWS web interface,
- API libraries in a programming language, such as
boto3
for Python 3, - AWS command-line interface, i.e.
awscli
.
I opted for the API library since it is
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
'''To use gzip file between python application and S3 directly for Python3. | |
Python 2 version - https://gist.github.com/a-hisame/f90815f4fae695ad3f16cb48a81ec06e | |
''' | |
import io | |
import gzip | |
import json |
from selenium import webdriver | |
from selenium.webdriver.common.proxy import Proxy | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | |
from selenium.webdriver.chrome.options import Options | |
import zipfile,os | |
def proxy_chrome(PROXY_HOST,PROXY_PORT,PROXY_USER,PROXY_PASS): | |
manifest_json = """ | |
{ |
SELECT *
, Specify explicit column names (columnar store)How to search in all countries *but* the US (or any other for that matter)? | |
Linkedin Country codes: https://developer.linkedin.com/docs/reference/country-codes# | |
Linkedin faceted search url format: %5B"ca%3A0"%2C"au%3A0"%2C"es%3A0"%5D | |
Decoded URL: ["ca:0","au:0","es:0"] | |
=> Complete list for injection in url (remove the country you want to exclude): | |
["ae:0","ar:0","at:0","au:0","be:0","br:0","ca:0","ch:0","cl:0","cn:0","co:0","cz:0","de:0","dk:0","es:0","fi:0","fr:0","fx:0","gb:0","gr:0","hk:0","hr:0","hu:0","id:0","ie:0","il:0","in:0","is:0","it:0","jp:0","lb:0","lu:0","lv:0","ma:0","mc:0","mx:0","my:0","nl:0","no:0","nz:0","oo:0","pe:0","ph:0","pk:0","pl:0","pr:0","pt:0","py:0","qa:0","ro:0","ru:0","sa:0","se:0","sg:0","sk:0","th:0","tr:0","tw:0","ua:0","us:0","uy:0","ve:0","vn:0","yu:0","za:0"] |
curl -L -k -s https://www.example.com | tac | sed "s#\\\/#\/#g" | egrep -o "src['\"]?\s*[=:]\s*['\"]?[^'\"]+.js[^'\"> ]*" | awk -F '//' '{if(length($2))print "https://"$2}' | sort -fu | xargs -I '%' sh -c "curl -k -s \"%\" | sed \"s/[;}\)>]/\n/g\" | grep -Po \"(['\\\"](https?:)?[/]{1,2}[^'\\\"> ]{5,})|(\.(get|post|ajax|load)\s*\(\s*['\\\"](https?:)?[/]{1,2}[^'\\\"> ]{5,})\"" | awk -F "['\"]" '{print $2}' | sort -fu | |
# using linkfinder | |
function ejs() { | |
URL=$1; | |
curl -Lks $URL | tac | sed "s#\\\/#\/#g" | egrep -o "src['\"]?\s*[=:]\s*['\"]?[^'\"]+.js[^'\"> ]*" | sed -r "s/^src['\"]?[=:]['\"]//g" | awk -v url=$URL '{if(length($1)) if($1 ~/^http/) print $1; else if($1 ~/^\/\//) print "https:"$1; else print url"/"$1}' | sort -fu | xargs -I '%' sh -c "echo \"\n##### %\";wget --no-check-certificate --quiet \"%\"; basename \"%\" | xargs -I \"#\" sh -c 'linkfinder.py -o cli -i #'" | |
} | |
# with file download (the new best one): | |
# but there is a bug if you don't provide a root url |
# Read the S3 folder as glue dynamic data frames | |
input_dyf = glueContext.create_dynamic_frame_from_options("s3", { | |
"paths": [ inputPath ], | |
"recurse": True, | |
"groupFiles": "inPartition" | |
}, | |
format = "parquet" | |
) | |
# Repartition them as required |
https://jennamolby.com/how-to-use-cookies-to-capture-url-parameters/ | |
let YOUR_DOMAIN = "YOUR_DOMAIN.TLD" // ex: scrapingbee.com | |
function getParameterByName(name) { | |
name = name.replace(/[\[]/, "\\[").replace(/[\]]/, "\\]"); | |
var regex = new RegExp("[\\?&]" + name + "=([^&#]*)"), | |
results = regex.exec(location.search); | |
return results === null ? "" : decodeURIComponent(results[1].replace(/\+/g, " ")); | |
} |
import geopandas as gpd | |
import pandas as pd | |
import fiona | |
import sys | |
import numpy as np | |
import folium | |
from folium.plugins import Search | |
#data from https://frap.fire.ca.gov/mapping/gis-data/ | |
#create a list of layers within a file geodatabase |