I hereby claim:
- I am tilakpatidar on github.
- I am tilakpatidar (https://keybase.io/tilakpatidar) on keybase.
- I have a public key whose fingerprint is B366 0F6B 48D9 5E12 D7DC 1487 FF74 B160 3F1C 7463
To claim this, I am signing this object:
import spark.implicits._ | |
import org.apache.spark.sql.SaveMode | |
val products = spark.sqlContext.read.format("jdbc").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "products").option("user", "gobblin").option("password", "gobblin").option("url", "jdbc:mysql://localhost/mopar_demo").load() | |
scala> val newProducts = spark.sqlContext.read.format("orc").load("/Users/tilak/gobblin/mopar-demo/output/org/apache/gobblin/copy/user/tilak/pricing.products_1521799535.csv/20180325023900_append/part.task_PullCsvFromS3_1521945534992_0_0.orc") | |
scala> val reparitionedProducts = products.repartition(10) | |
val joined = newProducts.as("np").join(reparitionedProducts.as("op"), reparitionedProducts("sha") === newProducts("sha"), "left_outer") |
# coding=utf-8 | |
import findspark | |
from pandas.util.testing import assert_frame_equal | |
findspark.init() | |
import logging | |
import pytest |
I hereby claim:
To claim this, I am signing this object:
#!/usr/bin/env bash | |
#python | |
curl -X POST http://localhost:6066/v1/submissions/create --header "Content-Type:application/json;charset=UTF-8" --data '{ | |
"action":"CreateSubmissionRequest", | |
"appArgs":[ | |
"/Users/tilak/jobs/test_job.py" | |
], | |
"appResource":"file:/Users/tilak/jobs/test_job.py", | |
"clientSparkVersion":"2.3.3", |
#!/usr/bin/env bash | |
#https://www.datageekinme.com/setup/setting-up-my-mac-sqoop/ | |
# Installation on mac | |
brew install sqoop | |
sudo mkdir /var/lib/accumulo | |
export ACCUMULO_HOME='/var/lib/accumulo' | |
export SQOOP_VERSION=1.4.6_1 | |
export SQOOP_HOME=/usr/local/Cellar/sqoop/1.4.6_1/libexec |
create database test_db;
create table t_random as select s, md5(random()::text) from generate_Series(1,5000) s;
In [1]: df=spark.read.jdbc(url="jdbc:postgresql://localhost:5432/test_db", table="t_random", properties={"driver": "org.postgresql.Driver"}).repartition(10)