Skip to content

Instantly share code, notes, and snippets.

@Hungsiro506
Created November 23, 2017 11:55
Show Gist options
  • Save Hungsiro506/d468fa58cf2a48fab75aac9f543f9f26 to your computer and use it in GitHub Desktop.
Save Hungsiro506/d468fa58cf2a48fab75aac9f543f9f26 to your computer and use it in GitHub Desktop.
scala> val df = spark.sqlContext.read.csv("/data/dns/cached_ip/*")
df: org.apache.spark.sql.DataFrame = [_c0: string]
scala> val cached = df
cached: org.apache.spark.sql.DataFrame = [_c0: string]
scala> val npic = spark.sqlContext.read.csv("/data/dns/npic_dns/*")
npic: org.apache.spark.sql.DataFrame = [_c0: string]
scala> val allo = spark.sqlContext.read.csv("/user/hungvd8/internet_user_profile_duration/Allocated-IPs2017-11-21.csv/*")
allo: org.apache.spark.sql.DataFrame = [_c0: string]
scala> val 2vs3 = npic.intersect(allo)
<console>:1: error: Invalid literal number
val 2vs3 = npic.intersect(allo)
^
scala> val _2vs3 = npic.intersect(allo)
_2vs3: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string]
scala> _2vs3.count()
res0: Long = 1
scala> val _cvs3 = cached.intersect(allo)
_cvs3: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string]
scala> val _1vs3 = _cvs3
_1vs3: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string]
scala> val _union = _1vs3.unionAll(allo).except(_1vs3.intersect(allo))
warning: there was one deprecation warning; re-run with -deprecation for details
_union: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string]
scala> _union.count
res1: Long = 1491
scala> _1vs3.count
res2: Long = 2854315
scala> allo.count
res3: Long = 2856005
scala> cached.count
res4: Long = 2951676
scala> _union.show(100)

Comments are disabled for this gist.