Skip to content

Instantly share code, notes, and snippets.

View fwbrasil's full-sized avatar

Flavio Brasil fwbrasil

  • Nubank
  • San Francisco Bay Area, California
View GitHub Profile
@fwbrasil
fwbrasil / gist:2db2b6ac2b86fe820442
Last active September 10, 2015 08:47
Monadic joins to SQL
t1.flatMap(a => t2.filter(b => b.s == a.s).map(b => b.s))
SELECT t2.s FROM t1, t2 WHERE t2.s = t1.s
t1.flatMap(a => t2.map(b => b.s).take(10))
SELECT x.s FROM t1, (SELECT * FROM t2 LIMIT 10) x
t1.flatMap(a => t2.filter(b => b.s == a.s).map(b => b.s).take(10))
package fix
import scalafix._
import scala.meta._
case class ReplaceSymbols(sctx: SemanticCtx) extends SemanticRewrite(sctx) {
def rewrite(ctx: RewriteCtx): Patch =
ctx.replaceSymbols(
"scala.concurrent.Await" -> "io.trane.future.scala.Await",
# Requirement: the project must be on Scala 2.12.3
# Step 1 - Add the scalafix sbt plugin
echo 'addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.5.0-M4")' >> projecs/plugins.sbt
# Step 2 (MANUAL) - Add the traneio Scala Future dependency to the sbt build
# resolvers += Resolver.sonatypeRepo("snapshots")
# libraryDependencies += "io.trane" % "future-scala" % "0.2.3-SNAPSHOT"
# Step 2 - Run the scalafix rewrite
@fwbrasil
fwbrasil / AsyncMacro.java
Last active October 21, 2024 14:29
Java macro DSL
/**
*
* THIS IS JUST A DSL PROTOTYPE
*
* First tentative of a DSL for defining macro transformations in Java.
* There's a lot of noise because of the absence of pattern matching and tuples.
* This is a prototype of one of the if/else transformations in Monadless:
*
* case q"if($cond) $ifTrue else $ifFalse" =>
* (ifTrue, ifFalse) match {

Please help us validate Quill 2.0.0-SNAPSHOT so we can make the final release. Steps:

  1. Read the migration notes
  2. Add the snapshots repo to your sbt build: resolvers += Resolver.sonatypeRepo("snapshots")
  3. Update the Quill version to 2.0.0-SNAPSHOT
  4. Fix the compilation errors, feel free to ask questions on the gitter channel
  5. Let us know the results

This version has 0️⃣ (yes, ZERO!) known bugs

def topHashtags(tweets: Dataset[Tweet], n: Int): Dataset[(String, Long)] =
run { // produce a dataset from the Quill query
liftQuery(tweets) // trasform the dataset into a Quill query
.concatMap(_.text.split(" ")) // split into words and unnest results
.filter(_.startsWith("#")) // filter hashtag words
.map(_.toLowerCase) // normalize hashtags
.groupBy(word => word) // group by each hashtag
.map { // map word list to its count
case (word, list) =>
(word, list.size)
def topHashtags(tweets: Dataset[Tweet], n: Int): Dataset[(String, BigInt)] =
tweets
.select($"text".as[String]) // select the text column (Dataframe)
.flatMap(_.split("\\s+")) // split it into words (Dataset)
.filter(_.startsWith("#")) // filter hashtag words (Dataset)
.map(_.toLowerCase) // normalize hashtags (Dataset)
.groupBy($"value") // group by each hashtag (Dataframe)
.agg(count("*") as "count") // aggregate the count (Dataframe)
.orderBy($"count" desc) // order (Datafeame)
.limit(n) // limit to top results (Dataframe)
def topHashtags(tweets: DataFrame, n: Int): DataFrame =
tweets
.select(explode(split($"text", "\\s+"))) // split it into words
.select(lower($"col") as "word") // normalize hashtags
.filter("word like '#%'") // filter hashtag words
.groupBy($"word") // group by each hashtag
.agg(count("*") as "count") // aggregate the count
.orderBy($"count" desc) // order
.limit(n) // limit to top results
def topHashtags(tweets: RDD[Tweet], n: Int): Array[(String, BigInt)] =
tweets
.flatMap(_.text.split("\\s+")) // split it into words
.filter(_.startsWith("#")) // filter hashtag words
.map(_.toLowerCase) // normalize hashtags
.map((_, BigInt(1))) // create tuples for counting
.reduceByKey((a, b) => a + b) // accumulate counters
.top(n)(Ordering.by(_._2)) // return ordered top hashtags
SELECT x2.*
FROM
(SELECT LOWER (x2._1) _1,
COUNT(*) _2
FROM
(SELECT explode(SPLIT(x1.text, ' ')) _1
FROM (?) x1) x2
WHERE x2._1 LIKE (concat('#', '%'))
GROUP BY LOWER (x2._1)) x10
ORDER BY - (x2._2) ASC NULLS FIRST