Created
June 19, 2012 20:21
-
-
Save jbowles/2956309 to your computer and use it in GitHub Desktop.
Goldmine collections in MongoDb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use case for goldmine: | |
# the pivoting functionality of Goldmine#dig allows you to easily mine data | |
# from Mongo collections | |
require 'goldmine' | |
require 'mongo' | |
require 'yajl' | |
class Digger | |
# Convenience for connecting to MongoDB database instance on localhost | |
def self.mongo_conn(dbase,coll_name) | |
port = Mongo::Connection::DEFAULT_PORT | |
db_conn = Mongo::Connection.new('127.0.0.1', port).db(dbase) | |
db_conn[coll_name] | |
end | |
#Get ALL!! documents from the collection | |
# @return [Mong::Cursor] | |
def self.find_all(dbase,coll_name) | |
collection = mongo_conn(dbase,coll_name) | |
puts "got #{collection.find().count()} records" | |
collection.find() | |
end | |
# Easy peasy to array by passing the result from Digger#find_all | |
# and loading encoding all string datatypes. | |
# @note by default you'll get [Array<BSON::OrderedHash>] An Array of BSON | |
# Ordered Hashes; it's only after we parse the document that we get a more | |
# usable Hash | |
# @param [Mongo::Cursor] mongo_cursor | |
# @param [String] document_name | |
# @return [Array<Hash>] An array of hashes | |
def self.collect_array(mongo_cursor,document_name) | |
@parser = Yajl::Parser | |
obj = mongo_cursor.to_a | |
obj_list = [] | |
obj.each do |e| | |
obj_list << @parser.parse(e["#{document_name}"]) | |
end | |
obj_list.compact | |
end | |
# Now lets start digging! | |
# See the examples below, I haven't defined them as methods yet | |
end | |
## @example | |
#require File.join(File.dirname(__FILE__), 'digger') | |
#digger = Digger | |
#cursor1 = digger.find_all('tamil','onovate') | |
#list1 = digger.collect_array(cursor1,'ono_stream') | |
#cursor2 = digger.find_all('tamil','two_onovate') | |
#list2 = digger.collect_array(cursor2,'ono_stream') | |
# | |
# | |
#follower_num = list1.dig("more than 1 follower"){|i| i['user']['followers_count'] > 1} | |
#bjw = list1.dig("bowleslingjw"){|i| i['user']['screen_name'] == 'bowleslingjw'} | |
#arrays = list1.dig("array"){|i| i["entities"]["hashtags"].is_a?(Array)} | |
# | |
#onovate = list1.dig("onovate"){|i| i["entities"]["hashtags"][0]["indices"]} | |
#list1[0]["entities"]["hashtags"][0]["text"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice... I'm looking forward to seeing all the interesting ways that goldmine gets used.