Last active
July 25, 2016 21:09
-
-
Save holdenk/1d1fa5e5d234327a12f5bc1a84069591 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scala> val df =spark.read.format("csv").option("header", "false").option("inferSchema", "true").load("/home/holden/Downloads/ex*.csv") | |
df: org.apache.spark.sql.DataFrame = [_c0: string, _c1: string ... 2125 more fields] | |
scala> df.collect() | |
16/07/25 12:53:40 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf. | |
res9: Array[org.apache.spark.sql.Row] = Array([Date,Lifetime Total Likes,Daily New Likes,Daily Unlikes,Daily Page Engaged Users,Weekly Page Engaged Users,28 Days Page Engaged Users,Daily Like Sources - On Your Page,Daily Total Reach,Weekly Total Reach,28 Days Total Reach,Daily Organic Reach,Weekly Organic Reach,28 Days Organic Reach,Daily Total Impressions,Weekly Total Impressions,28 Days Total Impressions,Daily Organic impressions,Weekly Organic impressions,28 Days Organic impressions,Daily Reach of page posts,Weekly Reach of page posts,28 Days Reach of page posts,Daily Organic Reach of Page posts,Weekly Organic Reach of Page posts,28 Days Organic Reach of Page posts,Daily Total Impressions of your posts,Weekly Total Impressions of your posts,28 Days Total Impressions of your posts,Dai... | |
scala> val df =spark.read.format("csv").option("header", "false").option("inferSchema", "false").load("/home/holden/Downloads/ex*.csv") | |
df: org.apache.spark.sql.DataFrame = [_c0: string, _c1: string ... 2125 more fields] | |
scala> val miniHeader = df.take(1) | |
miniHeader: Array[org.apache.spark.sql.Row] = Array([Date,Lifetime Total Likes,Daily New Likes,Daily Unlikes,Daily Page Engaged Users,Weekly Page Engaged Users,28 Days Page Engaged Users,Daily Like Sources - On Your Page,Daily Total Reach,Weekly Total Reach,28 Days Total Reach,Daily Organic Reach,Weekly Organic Reach,28 Days Organic Reach,Daily Total Impressions,Weekly Total Impressions,28 Days Total Impressions,Daily Organic impressions,Weekly Organic impressions,28 Days Organic impressions,Daily Reach of page posts,Weekly Reach of page posts,28 Days Reach of page posts,Daily Organic Reach of Page posts,Weekly Organic Reach of Page posts,28 Days Organic Reach of Page posts,Daily Total Impressions of your posts,Weekly Total Impressions of your posts,28 Days Total Impressions of your pos... | |
scala> miniHeader(0).toSeq | |
res11: Seq[Any] = WrappedArray(Date, Lifetime Total Likes, Daily New Likes, Daily Unlikes, Daily Page Engaged Users, Weekly Page Engaged Users, 28 Days Page Engaged Users, Daily Like Sources - On Your Page, Daily Total Reach, Weekly Total Reach, 28 Days Total Reach, Daily Organic Reach, Weekly Organic Reach, 28 Days Organic Reach, Daily Total Impressions, Weekly Total Impressions, 28 Days Total Impressions, Daily Organic impressions, Weekly Organic impressions, 28 Days Organic impressions, Daily Reach of page posts, Weekly Reach of page posts, 28 Days Reach of page posts, Daily Organic Reach of Page posts, Weekly Organic Reach of Page posts, 28 Days Organic Reach of Page posts, Daily Total Impressions of your posts, Weekly Total Impressions of your posts, 28 Days Total Impressions of yo... | |
scala> miniHeader(0).toSeq | |
res12: Seq[Any] = WrappedArray(Date, Lifetime Total Likes, Daily New Likes, Daily Unlikes, Daily Page Engaged Users, Weekly Page Engaged Users, 28 Days Page Engaged Users, Daily Like Sources - On Your Page, Daily Total Reach, Weekly Total Reach, 28 Days Total Reach, Daily Organic Reach, Weekly Organic Reach, 28 Days Organic Reach, Daily Total Impressions, Weekly Total Impressions, 28 Days Total Impressions, Daily Organic impressions, Weekly Organic impressions, 28 Days Organic impressions, Daily Reach of page posts, Weekly Reach of page posts, 28 Days Reach of page posts, Daily Organic Reach of Page posts, Weekly Organic Reach of Page posts, 28 Days Organic Reach of Page posts, Daily Total Impressions of your posts, Weekly Total Impressions of your posts, 28 Days Total Impressions of yo... | |
scala> val idx1 = res12.indexOf("Date") | |
idx1: Int = 0 | |
scala> df.schema | |
res13: org.apache.spark.sql.types.StructType = StructType(StructField(_c0,StringType,true), StructField(_c1,StringType,true), StructField(_c2,StringType,true), StructField(_c3,StringType,true), StructField(_c4,StringType,true), StructField(_c5,StringType,true), StructField(_c6,StringType,true), StructField(_c7,StringType,true), StructField(_c8,StringType,true), StructField(_c9,StringType,true), StructField(_c10,StringType,true), StructField(_c11,StringType,true), StructField(_c12,StringType,true), StructField(_c13,StringType,true), StructField(_c14,StringType,true), StructField(_c15,StringType,true), StructField(_c16,StringType,true), StructField(_c17,StringType,true), StructField(_c18,StringType,true), StructField(_c19,StringType,true), StructField(_c20,StringType,true), StructField(_c... | |
scala> df.select("_c0").collect() | |
res14: Array[org.apache.spark.sql.Row] = Array([Date], [2/1/2012], [3/2/2012], [3/3/2012]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment