// Let's create a sample dataset with just a single line, i.e. facebook profile
val facebookProfile = "ActivitiesDescription:703 likes, 0 talking about this, 4 were here; Category:; Email:[email protected]; Hours:Mon-Fri: 8:00 am - 5:00 pm; Likes:703; Link:https://www.facebook.com/pvhvac; Location:165 W Wieuca Rd NE, Ste 310, Atlanta, Georgia; Name:PV Heating & Air; NumberOfPictures:0; NumberOfReviews:26; Phone:(404) 798-9672; ShortDescription:We specialize in residential a/c, heating, indoor air quality & home performance.; Url:http://www.pvhvac.com; Visitors:4"
val fbs = Seq(facebookProfile).toDF("profile")
scala> fbs.show(truncate = false)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|profile |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|ActivitiesDescription:703 likes, 0 talking about this, 4 were here; Category:; Email:joe@pvhvac.com; Hours:Mon-Fri: 8:00 am - 5:00 pm; Likes:703; Link:https://www.facebook.com/pvhvac; Location:165 W Wieuca Rd NE, Ste 310, Atlanta, Georgia; Name:PV Heating & Air; NumberOfPictures:0; NumberOfReviews:26; Phone:(404) 798-9672; ShortDescription:We specialize in residential a/c, heating, indoor air quality & home performance.; Url:http://www.pvhvac.com; Visitors:4|
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
// you want to parse the values in `profile` column
// to access ":NUMBER likes"
val likes = fbs.withColumn("FaceBookLikes", regexp_replace($"profile", """.*(:\d+) likes.*""", """\1"""))
scala> likes.show
+--------------------+-------------+
| profile|FaceBookLikes|
+--------------------+-------------+
|ActivitiesDescrip...| 1|
+--------------------+-------------+
Created
December 16, 2017 12:14
-
-
Save jaceklaskowski/b39274d8802def9e5f7799d764b648ce to your computer and use it in GitHub Desktop.
Anatolyi - Facebook Profiles
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment