Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ghostflare76/a774bd57a2fe8283296f87ba66ff1b88 to your computer and use it in GitHub Desktop.
Save ghostflare76/a774bd57a2fe8283296f87ba66ff1b88 to your computer and use it in GitHub Desktop.
spark with okhttp
%spark
import okhttp3.Call
import okhttp3.MediaType
import okhttp3.MultipartBody
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.RequestBody
import okhttp3.Response
val url = "http://url"
val client = new OkHttpClient.Builder().build()
val request = new Request.Builder().url(url).addHeader("Origin", "https://url.com").build()
val response = client.newCall(request).execute().body().string()
var df = spark.read.json(Seq(response).toDS)
for (i <- 2 to 100) {
val url = "http://url?page="+ i +"
val request = new Request.Builder().url(url).addHeader("Origin", "https://url.com").build()
val response = client.newCall(request).execute().body().string()
val pageDF = spark.read.json(Seq(response).toDS)
df = df.union(pageDF)
}
val result = df.select(explode($"contents")).
select($"col.test_1", $"col.test_2",$"col.test_3",$"col.test_4")
result.createOrReplaceTempView("price_tb")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment