Created
November 23, 2017 11:55
-
-
Save Hungsiro506/d468fa58cf2a48fab75aac9f543f9f26 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scala> val df = spark.sqlContext.read.csv("/data/dns/cached_ip/*") | |
df: org.apache.spark.sql.DataFrame = [_c0: string] | |
scala> val cached = df | |
cached: org.apache.spark.sql.DataFrame = [_c0: string] | |
scala> val npic = spark.sqlContext.read.csv("/data/dns/npic_dns/*") | |
npic: org.apache.spark.sql.DataFrame = [_c0: string] | |
scala> val allo = spark.sqlContext.read.csv("/user/hungvd8/internet_user_profile_duration/Allocated-IPs2017-11-21.csv/*") | |
allo: org.apache.spark.sql.DataFrame = [_c0: string] | |
scala> val 2vs3 = npic.intersect(allo) | |
<console>:1: error: Invalid literal number | |
val 2vs3 = npic.intersect(allo) | |
^ | |
scala> val _2vs3 = npic.intersect(allo) | |
_2vs3: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string] | |
scala> _2vs3.count() | |
res0: Long = 1 | |
scala> val _cvs3 = cached.intersect(allo) | |
_cvs3: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string] | |
scala> val _1vs3 = _cvs3 | |
_1vs3: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string] | |
scala> val _union = _1vs3.unionAll(allo).except(_1vs3.intersect(allo)) | |
warning: there was one deprecation warning; re-run with -deprecation for details | |
_union: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [_c0: string] | |
scala> _union.count | |
res1: Long = 1491 | |
scala> _1vs3.count | |
res2: Long = 2854315 | |
scala> allo.count | |
res3: Long = 2856005 | |
scala> cached.count | |
res4: Long = 2951676 | |
scala> _union.show(100) |
Comments are disabled for this gist.