Created
March 22, 2010 19:24
-
-
Save iwarshak/340433 to your computer and use it in GitHub Desktop.
gists for mongo mapreduce post
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Book | |
include MongoMapper::Document | |
CONTEXTS = ['authors', 'rating','keywords', 'genre'] | |
CONTEXTS.each do |context| | |
key context, Array, :index => true | |
end | |
key :title, String<br /> | |
key :contexts, Array<br /> | |
before_create :set_contexts | |
def set_contexts | |
self.contexts = CONTEXTS | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'active_support' | |
require 'benchmark' | |
O = [('a'..'z'),('A'..'Z')].map{|i| i.to_a}.flatten | |
def generate_word_array(number = 5, length = 5) | |
string = (0..50).map{ O[rand(O.length)] }.join | |
number.times.collect { (0..length).map{ O[rand(O.length)] }.join } | |
end | |
AUTHORS = 20_000.times.collect { generate_word_array(1) } | |
KEYWORDS = 1_000.times.collect { generate_word_array(1) } | |
GENRE = 1_000.times.collect { generate_word_array(1) } | |
TITLES = 100_000.times.collect {generate_word_array(1, 10) } | |
# With 100k records | |
Book.collection.remove # delete all existing records | |
100_000.times do | |
book = Book.new(:title => TITLES.rand, :authors => AUTHORS.rand.flatten, :keywords => 10.times.collect {KEYWORDS.rand}.flatten, :rating => [1,2,3,4,5].rand, :genre => 2.times.collect {GENRE.rand}.flatten) | |
book.save | |
end | |
# irb(main):202:0> Benchmark.measure { Book.facet_search("authors" => {"$in" => AUTHORS.rand}) } | |
# => #<Benchmark::Tms:0x132c07a28 @cstime=0.0, @total=0.0, @cutime=0.0, @label="", @stime=0.0, @real=0.0468628406524658, @utime=0.0> | |
# | |
# Benchmark.measure { puts Book.facet_search('rating' => {"$in" => [3,4,5]}, 'keywords' => {'$in' => KEYWORDS.rand}) } | |
# => #<Benchmark::Tms:0x132d5af38 @cstime=0.0, @total=0.390000000000004, @cutime=0.0, @label="", @stime=0.0199999999999996, @real=3.84707593917847, @utime=0.370000000000005> | |
# With 1 Million records | |
Book.collection.remove | |
1_000_000.times do | |
book = Book.new(:title => TITLES.rand, :authors => AUTHORS.rand.flatten, :keywords => 10.times.collect {KEYWORDS.rand}.flatten, :rating => [1,2,3,4,5].rand, :genre => 2.times.collect {GENRE.rand}.flatten) | |
book.save | |
end | |
# irb(main):233:0> Benchmark.measure { Book.facet_search("authors" => {"$in" => AUTHORS.rand}) } | |
# => #<Benchmark::Tms:0x132fc3c48 @cstime=0.0, @total=0.0400000000001981, @cutime=0.0, @label="", @stime=0.00999999999999801, @real=0.588751077651978, @utime=0.0300000000002001> | |
# | |
# irb(main):235:0> Benchmark.measure { puts Book.facet_search('rating' => {"$in" => [3,4,5]}, 'keywords' => {'$in' => KEYWORDS.rand}) } | |
# => #<Benchmark::Tms:0x132702ac8 @cstime=0.0, @total=0.689999999999856, @cutime=0.0, @label="", @stime=0.0300000000000011, @real=43.1640980243683, @utime=0.659999999999854> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'mongo_mapper' | |
MongoMapper.connection = Mongo::Connection.new('localhost') | |
MongoMapper.database = 'books' | |
class Book | |
include MongoMapper::Document | |
CONTEXTS = ['authors', 'rating','keywords', 'genre'] | |
CONTEXTS.each do |context| | |
key context, Array, :index => true | |
end | |
key :title, String | |
key :contexts, Array | |
before_create :set_contexts | |
def set_contexts | |
self.contexts = CONTEXTS | |
end | |
def self.facet_search(query = {}) | |
map = <<-MAP | |
function() { | |
var that = this; | |
this.contexts.forEach(function(context) { | |
that[context].forEach(function(tag) { | |
print('!!!!!emitting. tag: ' + tag + ', { ' + context +' : 1 }'); | |
t = {}; | |
t[context] = 1 | |
emit(tag, t) | |
}); | |
}); | |
} | |
MAP | |
reduce = <<-REDUCE | |
function(tag, values) { | |
res = {}; | |
print('!!tag: ' + tag + ' values: ' + tojson(values)); | |
values.forEach(function(tuple) { | |
for(context in tuple) { | |
if(res[context] === undefined) { | |
print(tag + ' is undefined for ' + context + ' setting to ' + tuple[context]); | |
res[context] = tuple[context]; | |
} else { | |
print(tag + ' is currently ' + res[context] + ' incrementing by ' + tuple[context]); | |
res[context] += tuple[context]; | |
} | |
} | |
}); | |
print("returning tag: " + tag + " values: " + tojson(res)); | |
return res; | |
} | |
REDUCE | |
sort_facets(self.collection.map_reduce(map, reduce,{:query => query })) | |
end | |
private | |
def self.sort_facets(t) | |
contexts = {} | |
t.find.each do |res| | |
res["value"].keys.each do |ctxt| | |
contexts[ctxt] ||= {} | |
contexts[ctxt][res['_id']] ||= 0 | |
contexts[ctxt][res['_id']] += res["value"][ctxt] | |
end | |
end | |
contexts | |
end | |
end | |
# Book.create(:title => 'Jurassic Park', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['velociraptor', 'clever girl'], :rating => [4]) | |
# Book.create(:title => 'Sphere', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['ocean'], :rating => [5]) | |
# Book.create(:title => 'The Firm', :author => 'John Grisham', :authors => ['John Grisham'], :genre => ['fiction'], :keywords => ['law', 'lawyer'], :rating => [4]) | |
# irb(main):237:0> Book.facet_search("authors" => {"$in" => ['John Grisham']}) | |
# => {"rating"=>{4.0=>1.0}, "genre"=>{"fiction"=>1.0}, "authors"=>{"John Grisham"=>1.0}, "keywords"=>{"law"=>1.0, "lawyer"=>1.0}} | |
# | |
# irb(main):241:0> Book.facet_search("authors" => {"$in" => ['Michael Crichton']}, :rating => {"$in" => [5]}) | |
# => {"rating"=>{5.0=>1.0}, "genre"=>{"fiction"=>1.0}, "authors"=>{"Michael Crichton"=>1.0}, "keywords"=>{"ocean"=>1.0}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Book.create(:title => 'Jurassic Park', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['velociraptor', 'clever girl'], :rating => [4]) | |
Book.create(:title => 'Sphere', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['ocean'], :rating => [5]) | |
Book.create(:title => 'The Firm', :author => 'John Grisham', :authors => ['John Grisham'], :genre => ['fiction'], :keywords => ['law', 'lawyer'], :rating => [4]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
map = <<-MAP | |
function() { | |
var that = this; | |
this.contexts.forEach(function(context) { | |
that[context].forEach(function(tag) { | |
print('emitting. tag: ' + tag + ', ' + context +' : 1'); | |
t = {}; | |
t[context] = 1; | |
emit(tag, t) | |
}); | |
}); | |
} | |
MAP |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Michael Crichton, { authors : 1 } | |
4, { rating : 1 } | |
velociraptor, { keywords : 1 } | |
clever girl, { keywords : 1 } | |
fiction, { genre : 1 } | |
Michael Crichton, { authors : 1 } | |
5, { rating : 1 } | |
ocean, { keywords : 1 } | |
fiction, { genre : 1 } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reduce('Michael Crichton', [ {authors : 1}, {authors : 1]} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reduce = <<-REDUCE | |
function(tag, values) { | |
res = {}; | |
print('tag: ' + tag + ' values: ' + tojson(values)); | |
values.forEach(function(tuple) { | |
for(context in tuple) { | |
if(res[context] === undefined) { | |
print(tag + ' is undefined for ' + context + ' setting to ' + tuple[context]); | |
res[context] = tuple[context]; | |
} else { | |
print(tag + ' is currently ' + res[context] + ' incrementing by ' + tuple[context]); | |
res[context] += tuple[context]; | |
} | |
} | |
}); | |
print("returning tag: " + tag + " values: " + tojson(res)); | |
return res; | |
} | |
REDUCE |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5, values: { "rating" : 1 } | |
4, values: {"rating" : 1 } | |
Michael Crichton, values: { "authors" : 2 } | |
clever girl, values: { "keywords" : 1 } | |
fiction, values: { "genre" : 2 } | |
ocean, values: { "keywords" : 1 } | |
velociraptor, values: { "keywords" : 1 } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rating | |
5 : 1 | |
4 : 1 | |
authors | |
Michael Crichton: 2 | |
keywords | |
clever girl : 1 | |
ocean : 1 | |
velociraptor : 1 | |
genre | |
fiction : 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment