Created
March 22, 2010 19:24
-
-
Save iwarshak/340433 to your computer and use it in GitHub Desktop.
gists for mongo mapreduce post
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class Book | |
| include MongoMapper::Document | |
| CONTEXTS = ['authors', 'rating','keywords', 'genre'] | |
| CONTEXTS.each do |context| | |
| key context, Array, :index => true | |
| end | |
| key :title, String<br /> | |
| key :contexts, Array<br /> | |
| before_create :set_contexts | |
| def set_contexts | |
| self.contexts = CONTEXTS | |
| end | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'active_support' | |
| require 'benchmark' | |
| O = [('a'..'z'),('A'..'Z')].map{|i| i.to_a}.flatten | |
| def generate_word_array(number = 5, length = 5) | |
| string = (0..50).map{ O[rand(O.length)] }.join | |
| number.times.collect { (0..length).map{ O[rand(O.length)] }.join } | |
| end | |
| AUTHORS = 20_000.times.collect { generate_word_array(1) } | |
| KEYWORDS = 1_000.times.collect { generate_word_array(1) } | |
| GENRE = 1_000.times.collect { generate_word_array(1) } | |
| TITLES = 100_000.times.collect {generate_word_array(1, 10) } | |
| # With 100k records | |
| Book.collection.remove # delete all existing records | |
| 100_000.times do | |
| book = Book.new(:title => TITLES.rand, :authors => AUTHORS.rand.flatten, :keywords => 10.times.collect {KEYWORDS.rand}.flatten, :rating => [1,2,3,4,5].rand, :genre => 2.times.collect {GENRE.rand}.flatten) | |
| book.save | |
| end | |
| # irb(main):202:0> Benchmark.measure { Book.facet_search("authors" => {"$in" => AUTHORS.rand}) } | |
| # => #<Benchmark::Tms:0x132c07a28 @cstime=0.0, @total=0.0, @cutime=0.0, @label="", @stime=0.0, @real=0.0468628406524658, @utime=0.0> | |
| # | |
| # Benchmark.measure { puts Book.facet_search('rating' => {"$in" => [3,4,5]}, 'keywords' => {'$in' => KEYWORDS.rand}) } | |
| # => #<Benchmark::Tms:0x132d5af38 @cstime=0.0, @total=0.390000000000004, @cutime=0.0, @label="", @stime=0.0199999999999996, @real=3.84707593917847, @utime=0.370000000000005> | |
| # With 1 Million records | |
| Book.collection.remove | |
| 1_000_000.times do | |
| book = Book.new(:title => TITLES.rand, :authors => AUTHORS.rand.flatten, :keywords => 10.times.collect {KEYWORDS.rand}.flatten, :rating => [1,2,3,4,5].rand, :genre => 2.times.collect {GENRE.rand}.flatten) | |
| book.save | |
| end | |
| # irb(main):233:0> Benchmark.measure { Book.facet_search("authors" => {"$in" => AUTHORS.rand}) } | |
| # => #<Benchmark::Tms:0x132fc3c48 @cstime=0.0, @total=0.0400000000001981, @cutime=0.0, @label="", @stime=0.00999999999999801, @real=0.588751077651978, @utime=0.0300000000002001> | |
| # | |
| # irb(main):235:0> Benchmark.measure { puts Book.facet_search('rating' => {"$in" => [3,4,5]}, 'keywords' => {'$in' => KEYWORDS.rand}) } | |
| # => #<Benchmark::Tms:0x132702ac8 @cstime=0.0, @total=0.689999999999856, @cutime=0.0, @label="", @stime=0.0300000000000011, @real=43.1640980243683, @utime=0.659999999999854> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'rubygems' | |
| require 'mongo_mapper' | |
| MongoMapper.connection = Mongo::Connection.new('localhost') | |
| MongoMapper.database = 'books' | |
| class Book | |
| include MongoMapper::Document | |
| CONTEXTS = ['authors', 'rating','keywords', 'genre'] | |
| CONTEXTS.each do |context| | |
| key context, Array, :index => true | |
| end | |
| key :title, String | |
| key :contexts, Array | |
| before_create :set_contexts | |
| def set_contexts | |
| self.contexts = CONTEXTS | |
| end | |
| def self.facet_search(query = {}) | |
| map = <<-MAP | |
| function() { | |
| var that = this; | |
| this.contexts.forEach(function(context) { | |
| that[context].forEach(function(tag) { | |
| print('!!!!!emitting. tag: ' + tag + ', { ' + context +' : 1 }'); | |
| t = {}; | |
| t[context] = 1 | |
| emit(tag, t) | |
| }); | |
| }); | |
| } | |
| MAP | |
| reduce = <<-REDUCE | |
| function(tag, values) { | |
| res = {}; | |
| print('!!tag: ' + tag + ' values: ' + tojson(values)); | |
| values.forEach(function(tuple) { | |
| for(context in tuple) { | |
| if(res[context] === undefined) { | |
| print(tag + ' is undefined for ' + context + ' setting to ' + tuple[context]); | |
| res[context] = tuple[context]; | |
| } else { | |
| print(tag + ' is currently ' + res[context] + ' incrementing by ' + tuple[context]); | |
| res[context] += tuple[context]; | |
| } | |
| } | |
| }); | |
| print("returning tag: " + tag + " values: " + tojson(res)); | |
| return res; | |
| } | |
| REDUCE | |
| sort_facets(self.collection.map_reduce(map, reduce,{:query => query })) | |
| end | |
| private | |
| def self.sort_facets(t) | |
| contexts = {} | |
| t.find.each do |res| | |
| res["value"].keys.each do |ctxt| | |
| contexts[ctxt] ||= {} | |
| contexts[ctxt][res['_id']] ||= 0 | |
| contexts[ctxt][res['_id']] += res["value"][ctxt] | |
| end | |
| end | |
| contexts | |
| end | |
| end | |
| # Book.create(:title => 'Jurassic Park', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['velociraptor', 'clever girl'], :rating => [4]) | |
| # Book.create(:title => 'Sphere', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['ocean'], :rating => [5]) | |
| # Book.create(:title => 'The Firm', :author => 'John Grisham', :authors => ['John Grisham'], :genre => ['fiction'], :keywords => ['law', 'lawyer'], :rating => [4]) | |
| # irb(main):237:0> Book.facet_search("authors" => {"$in" => ['John Grisham']}) | |
| # => {"rating"=>{4.0=>1.0}, "genre"=>{"fiction"=>1.0}, "authors"=>{"John Grisham"=>1.0}, "keywords"=>{"law"=>1.0, "lawyer"=>1.0}} | |
| # | |
| # irb(main):241:0> Book.facet_search("authors" => {"$in" => ['Michael Crichton']}, :rating => {"$in" => [5]}) | |
| # => {"rating"=>{5.0=>1.0}, "genre"=>{"fiction"=>1.0}, "authors"=>{"Michael Crichton"=>1.0}, "keywords"=>{"ocean"=>1.0}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Book.create(:title => 'Jurassic Park', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['velociraptor', 'clever girl'], :rating => [4]) | |
| Book.create(:title => 'Sphere', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['ocean'], :rating => [5]) | |
| Book.create(:title => 'The Firm', :author => 'John Grisham', :authors => ['John Grisham'], :genre => ['fiction'], :keywords => ['law', 'lawyer'], :rating => [4]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| map = <<-MAP | |
| function() { | |
| var that = this; | |
| this.contexts.forEach(function(context) { | |
| that[context].forEach(function(tag) { | |
| print('emitting. tag: ' + tag + ', ' + context +' : 1'); | |
| t = {}; | |
| t[context] = 1; | |
| emit(tag, t) | |
| }); | |
| }); | |
| } | |
| MAP |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Michael Crichton, { authors : 1 } | |
| 4, { rating : 1 } | |
| velociraptor, { keywords : 1 } | |
| clever girl, { keywords : 1 } | |
| fiction, { genre : 1 } | |
| Michael Crichton, { authors : 1 } | |
| 5, { rating : 1 } | |
| ocean, { keywords : 1 } | |
| fiction, { genre : 1 } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| reduce('Michael Crichton', [ {authors : 1}, {authors : 1]} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| reduce = <<-REDUCE | |
| function(tag, values) { | |
| res = {}; | |
| print('tag: ' + tag + ' values: ' + tojson(values)); | |
| values.forEach(function(tuple) { | |
| for(context in tuple) { | |
| if(res[context] === undefined) { | |
| print(tag + ' is undefined for ' + context + ' setting to ' + tuple[context]); | |
| res[context] = tuple[context]; | |
| } else { | |
| print(tag + ' is currently ' + res[context] + ' incrementing by ' + tuple[context]); | |
| res[context] += tuple[context]; | |
| } | |
| } | |
| }); | |
| print("returning tag: " + tag + " values: " + tojson(res)); | |
| return res; | |
| } | |
| REDUCE |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 5, values: { "rating" : 1 } | |
| 4, values: {"rating" : 1 } | |
| Michael Crichton, values: { "authors" : 2 } | |
| clever girl, values: { "keywords" : 1 } | |
| fiction, values: { "genre" : 2 } | |
| ocean, values: { "keywords" : 1 } | |
| velociraptor, values: { "keywords" : 1 } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| rating | |
| 5 : 1 | |
| 4 : 1 | |
| authors | |
| Michael Crichton: 2 | |
| keywords | |
| clever girl : 1 | |
| ocean : 1 | |
| velociraptor : 1 | |
| genre | |
| fiction : 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment