To be able to use custom endpoints with the latest Spark distribution, one needs to add an external package (hadoop-aws
). Then, custum endpoints can be configured according to docs.
bin/spark-shell --packages org.apache.hadoop:hadoop-aws:2.7.2
import { Table, Vector, Field, Utf8, Type, Schema } from 'apache-arrow'; | |
/** | |
* Cast all columns with complex data types in an Apache Arrow Table to strings | |
* @param {Table} table - The Apache Arrow Table | |
* @returns {Table} - A new Table with all complex data type columns cast to strings | |
*/ | |
function castComplexColumnsToString(table: Table): Table { | |
const schemaFields = table.schema.fields; |
#!/bin/bash | |
FUNCTION_NAME=$1 | |
for region in $(aws --output text ec2 describe-regions | cut -f 4) | |
do | |
echo "Checking $region" | |
for loggroup in $(aws --output text logs describe-log-groups --log-group-prefix "/aws/lambda/us-east-1.$FUNCTION_NAME" --region $region --query 'logGroups[].logGroupName') | |
do | |
echo "Found '$loggroup' in region $region" | |
for logstream in $(aws --output text logs describe-log-streams --log-group-name $loggroup --region $region --query 'logStreams[].logStreamName') |
{ | |
"Type": "AWS::IAM::Role", | |
"Properties": { | |
"AssumeRolePolicyDocument": { | |
"Version": "2012-10-17", | |
"Statement": [{ | |
"Effect": "Allow", | |
"Principal": { | |
"Service": [ | |
"lambda.amazonaws.com", |
{ | |
"us-east-1": { | |
"city": "Ashburn", | |
"state": "Virginia", | |
"country": "United States", | |
"countryCode": "US", | |
"latitude": 38.9445, | |
"longitude": -77.4558029, | |
"region": "North America", | |
"iataCode": "IAD" |
#!/bin/bash | |
curl -s https://hub.docker.com/v2/repositories/$1/\?page_size\=1000 | jq -r '["user", "name", "description", "star_count", "pull_count"] as $fields | $fields, (.results[] | [.[$fields[]]]) | @csv' | |
To be able to use custom endpoints with the latest Spark distribution, one needs to add an external package (hadoop-aws
). Then, custum endpoints can be configured according to docs.
bin/spark-shell --packages org.apache.hadoop:hadoop-aws:2.7.2
var phantom = require('phantom'); | |
var async = require('async'); | |
var pagesToCall = [ | |
['http://www.google.com', 8000], | |
['http://www.allthingsd.com', 8001], | |
['http://www.wired.com', 8002], | |
['http://www.mashable.com', 8003], | |
['http://www.stackoverflow.com', 8004] | |
]; |