Created
January 20, 2012 13:02
-
-
Save alaz/1647302 to your computer and use it in GitHub Desktop.
REPLable Subset play
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.io._ | |
import collection.JavaConversions._ | |
import com.mongodb._ | |
import com.osinka.subset._ | |
import SmartValues._ | |
// we want to store it in MongoDB. ok.. | |
val mongo = new Mongo("192.168.0.164") | |
val db = mongo getDB "test" | |
val coll = db getCollection "timeline" | |
// how would we build the unique index so that we do not store duplicates? | |
val id = "id".fieldOf[Long] | |
coll.ensureIndex(id.int === 1, "unique".fieldOf[Boolean] === true) | |
// We want structured data and Twitter may give us some. We want them as DBObjects | |
def tweets(name: String, count: Int = 20): Iterable[DBObject] = { | |
import com.mongodb.util._ | |
val stream = Source.fromURL("http://api.twitter.com/1/statuses/user_timeline.json?screen_name=%s&count=%d".format(name, count)) | |
val timeline = JSON.parse( stream.mkString ).asInstanceOf[DBObject] | |
for {k <- timeline.keySet} | |
yield timeline.get(k).asInstanceOf[DBObject] | |
} | |
// ok, let's save few | |
tweets("planetscala", 50) foreach {doc => | |
coll save doc match { | |
case wr if wr.getError != null => println("Failed to save: "+wr.getError) | |
case _ => | |
} | |
} | |
// feel free to repeat! |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// we want to see DateTime in Joda, but they have a specific format in Twitter | |
import org.joda.time.DateTime | |
implicit val myDateReader = { | |
import java.util.Locale | |
import org.joda.time.format.DateTimeFormat | |
val fmt = DateTimeFormat.forPattern("EEE MMM dd HH:mm:ss Z yyyy").withLocale(Locale.ENGLISH) | |
ValueReader[DateTime]({ case s: String => fmt.parseDateTime(s) }) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val text = "text".fieldOf[String] | |
val createdAt = "created_at".fieldOf[DateTime] | |
coll.find.iterator foreach { case text(t) ~ createdAt(dt) => println(t + " @ " + dt) } | |
// ok, fine, how about a User subdocument? | |
object User { | |
val tweets = "statuses_count".fieldOf[Int] | |
val description = "description".fieldOf[String] | |
val name = "screen_name".fieldOf[String] | |
} | |
val user = "user".fieldOf[DBObject] | |
val U = User.name ~ User.tweets | |
// U is an extractor, that returns a tuple of "name" and "tweets" from DBObjects, so this should work: | |
coll.find.iterator take 2 flatMap {doc => user.unapply(doc).toIterator} foreach { case U(n,t) => println(n+" has "+t) } | |
// since User is an extractor itself: | |
coll.find.iterator take 5 foreach { case user(U(n, t)) => println(n+" has "+t) } | |
// or we can extract both subdocument and document fields at once | |
coll.find.iterator take 5 foreach { case user(U(n, cnt)) ~ text(t) ~ createdAt(dt) => println("%s [%d] @ %s: %s".format(n, cnt, dt, t)) } | |
// by the way, we are not obliged to query MongoDB collection, we may directly deserialize DBObjects from any source, e.g. our stream | |
tweets("planetscala", 2) foreach { case text(t) ~ createdAt(dt) => println(t+" @ "+dt) } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// want User to return a useful object? | |
// :paste | |
case class User(name: String, tweets: Int, description: String) | |
object User { | |
val tweets = "statuses_count".fieldOf[Int] | |
val description = "description".fieldOf[String] | |
val name = "screen_name".fieldOf[String] | |
implicit val userReader = | |
ValueReader[User]({ | |
case name(n) ~ tweets(t) ~ description(d) => new User(n,t,d) | |
}) | |
} | |
// :paste off | |
val user = "user".subset(User).of[User] | |
// then, | |
coll.find.iterator take 5 foreach { case user(u) ~ text(t) ~ createdAt(dt) => println("%s @ %s: %s".format(u, dt, t)) } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// just in case you wonder if you can save something | |
implicit val dateTimeWriter = ValueWriter[DateTime](_.toDate) | |
val coll1 = db getCollection "timeline1" | |
coll.find.iterator foreach { case user(u) ~ text(t) ~ createdAt(dt) => | |
coll1.insert(text(t) ~ createdAt(dt)) | |
} | |
// did you notice which fields the documents in the new collection have?.. how would you keep all the original fields? | |
coll1.remove(Query.empty) | |
coll.find.iterator foreach { dbo => | |
dbo match { case text(t) ~ createdAt(dt) => | |
val lens = createdAt(new DateTime) ~ text("new timeline") | |
coll1 save lens(dbo) | |
} | |
} | |
// go make sure, all the records have current timestamp at "created_at" field |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment