alaz · January 20, 2012 13:02
diff --git a/01_init.scala b/01_init.scala
 import scala.io._
 import collection.JavaConversions._
 import com.mongodb._
 import com.osinka.subset._
 import SmartValues._

 // we want to store it in MongoDB. ok..
 val mongo = new Mongo("192.168.0.164")
 val db = mongo getDB "test"
 val coll = db getCollection "timeline"

 // how would we build the unique index so that we do not store duplicates?
 val id = "id".fieldOf[Long]
 coll.ensureIndex(id.int === 1, "unique".fieldOf[Boolean] === true)

 // We want structured data and Twitter may give us some. We want them as DBObjects
 def tweets(name: String, count: Int = 20): Iterable[DBObject] = {
  import com.mongodb.util._
  val stream = Source.fromURL("http://api.twitter.com/1/statuses/user_timeline.json?screen_name=%s&count=%d".format(name, count))
  val timeline = JSON.parse( stream.mkString ).asInstanceOf[DBObject]
  for {k <- timeline.keySet}
  yield timeline.get(k).asInstanceOf[DBObject]
 }

 // ok, let's save few
 tweets("planetscala", 50) foreach {doc =>
  coll save doc match {
    case wr if wr.getError != null => println("Failed to save: "+wr.getError)
    case _ =>
  }
 }

 // feel free to repeat!
diff --git a/02_customFieldReader.scala b/02_customFieldReader.scala
 // we want to see DateTime in Joda, but they have a specific format in Twitter
 import org.joda.time.DateTime
 implicit val myDateReader = {
  import java.util.Locale
  import org.joda.time.format.DateTimeFormat

  val fmt = DateTimeFormat.forPattern("EEE MMM dd HH:mm:ss Z yyyy").withLocale(Locale.ENGLISH)
  ValueReader[DateTime]({ case s: String => fmt.parseDateTime(s) })
 }
diff --git a/03_reading.scala b/03_reading.scala
 val text = "text".fieldOf[String]
 val createdAt = "created_at".fieldOf[DateTime]

 coll.find.iterator foreach { case text(t) ~ createdAt(dt) => println(t + " @ " + dt) }

 // ok, fine, how about a User subdocument?
 object User {
  val tweets = "statuses_count".fieldOf[Int]
  val description = "description".fieldOf[String]
  val name = "screen_name".fieldOf[String]
 }

 val user = "user".fieldOf[DBObject]
 val U = User.name ~ User.tweets

 // U is an extractor, that returns a tuple of "name" and "tweets" from DBObjects, so this should work:
 coll.find.iterator take 2 flatMap {doc => user.unapply(doc).toIterator} foreach { case U(n,t) => println(n+" has "+t) }

 // since User is an extractor itself:
 coll.find.iterator take 5 foreach { case user(U(n, t)) => println(n+" has "+t) }

 // or we can extract both subdocument and document fields at once
 coll.find.iterator take 5 foreach { case user(U(n, cnt)) ~ text(t) ~ createdAt(dt) => println("%s [%d] @ %s: %s".format(n, cnt, dt, t)) }

 // by the way, we are not obliged to query MongoDB collection, we may directly deserialize DBObjects from any source, e.g. our stream
 tweets("planetscala", 2) foreach { case text(t) ~ createdAt(dt) => println(t+" @ "+dt) }
diff --git a/04_typedSubset.scala b/04_typedSubset.scala
 // want User to return a useful object?
 // :paste
 case class User(name: String, tweets: Int, description: String)

 object User {
  val tweets = "statuses_count".fieldOf[Int]
  val description = "description".fieldOf[String]
  val name = "screen_name".fieldOf[String]

  implicit val userReader =
    ValueReader[User]({
      case name(n) ~ tweets(t) ~ description(d) => new User(n,t,d)
    })
 }
 // :paste off

 val user = "user".subset(User).of[User]

 // then,
 coll.find.iterator take 5 foreach { case user(u) ~ text(t) ~ createdAt(dt) => println("%s @ %s: %s".format(u, dt, t)) }
diff --git a/05_saving.scala b/05_saving.scala
 // just in case you wonder if you can save something

 implicit val dateTimeWriter = ValueWriter[DateTime](_.toDate)

 val coll1 = db getCollection "timeline1"
 coll.find.iterator foreach { case user(u) ~ text(t) ~ createdAt(dt) =>
  coll1.insert(text(t) ~ createdAt(dt))
 }

 // did you notice which fields the documents in the new collection have?.. how would you keep all the original fields?

 coll1.remove(Query.empty)
 coll.find.iterator foreach { dbo =>
  dbo match { case text(t) ~ createdAt(dt) =>
    val lens = createdAt(new DateTime) ~ text("new timeline")
    coll1 save lens(dbo)
  }
 }

 // go make sure, all the records have current timestamp at "created_at" field
	import scala.io._
	import collection.JavaConversions._
	import com.mongodb._
	import com.osinka.subset._
	import SmartValues._

	// we want to store it in MongoDB. ok..
	val mongo = new Mongo("192.168.0.164")
	val db = mongo getDB "test"
	val coll = db getCollection "timeline"

	// how would we build the unique index so that we do not store duplicates?
	val id = "id".fieldOf[Long]
	coll.ensureIndex(id.int === 1, "unique".fieldOf[Boolean] === true)

	// We want structured data and Twitter may give us some. We want them as DBObjects
	def tweets(name: String, count: Int = 20): Iterable[DBObject] = {
	import com.mongodb.util._
	val stream = Source.fromURL("http://api.twitter.com/1/statuses/user_timeline.json?screen_name=%s&count=%d".format(name, count))
	val timeline = JSON.parse( stream.mkString ).asInstanceOf[DBObject]
	for {k <- timeline.keySet}
	yield timeline.get(k).asInstanceOf[DBObject]
	}

	// ok, let's save few
	tweets("planetscala", 50) foreach {doc =>
	coll save doc match {
	case wr if wr.getError != null => println("Failed to save: "+wr.getError)
	case _ =>
	}
	}

	// feel free to repeat!
	// we want to see DateTime in Joda, but they have a specific format in Twitter
	import org.joda.time.DateTime
	implicit val myDateReader = {
	import java.util.Locale
	import org.joda.time.format.DateTimeFormat

	val fmt = DateTimeFormat.forPattern("EEE MMM dd HH:mm:ss Z yyyy").withLocale(Locale.ENGLISH)
	ValueReader[DateTime]({ case s: String => fmt.parseDateTime(s) })
	}
	val text = "text".fieldOf[String]
	val createdAt = "created_at".fieldOf[DateTime]

	coll.find.iterator foreach { case text(t) ~ createdAt(dt) => println(t + " @ " + dt) }

	// ok, fine, how about a User subdocument?
	object User {
	val tweets = "statuses_count".fieldOf[Int]
	val description = "description".fieldOf[String]
	val name = "screen_name".fieldOf[String]
	}

	val user = "user".fieldOf[DBObject]
	val U = User.name ~ User.tweets

	// U is an extractor, that returns a tuple of "name" and "tweets" from DBObjects, so this should work:
	coll.find.iterator take 2 flatMap {doc => user.unapply(doc).toIterator} foreach { case U(n,t) => println(n+" has "+t) }

	// since User is an extractor itself:
	coll.find.iterator take 5 foreach { case user(U(n, t)) => println(n+" has "+t) }

	// or we can extract both subdocument and document fields at once
	coll.find.iterator take 5 foreach { case user(U(n, cnt)) ~ text(t) ~ createdAt(dt) => println("%s [%d] @ %s: %s".format(n, cnt, dt, t)) }

	// by the way, we are not obliged to query MongoDB collection, we may directly deserialize DBObjects from any source, e.g. our stream
	tweets("planetscala", 2) foreach { case text(t) ~ createdAt(dt) => println(t+" @ "+dt) }
	// want User to return a useful object?
	// :paste
	case class User(name: String, tweets: Int, description: String)

	object User {
	val tweets = "statuses_count".fieldOf[Int]
	val description = "description".fieldOf[String]
	val name = "screen_name".fieldOf[String]

	implicit val userReader =
	ValueReader[User]({
	case name(n) ~ tweets(t) ~ description(d) => new User(n,t,d)
	})
	}
	// :paste off

	val user = "user".subset(User).of[User]

	// then,
	coll.find.iterator take 5 foreach { case user(u) ~ text(t) ~ createdAt(dt) => println("%s @ %s: %s".format(u, dt, t)) }
	// just in case you wonder if you can save something

	implicit val dateTimeWriter = ValueWriter[DateTime](_.toDate)

	val coll1 = db getCollection "timeline1"
	coll.find.iterator foreach { case user(u) ~ text(t) ~ createdAt(dt) =>
	coll1.insert(text(t) ~ createdAt(dt))
	}

	// did you notice which fields the documents in the new collection have?.. how would you keep all the original fields?

	coll1.remove(Query.empty)
	coll.find.iterator foreach { dbo =>
	dbo match { case text(t) ~ createdAt(dt) =>
	val lens = createdAt(new DateTime) ~ text("new timeline")
	coll1 save lens(dbo)
	}
	}

	// go make sure, all the records have current timestamp at "created_at" field