Created
September 2, 2011 22:44
-
-
Save heuristicfencepost/1190136 to your computer and use it in GitHub Desktop.
Clojure and Ruby apps to populate mock user data into a Cassandra database and then retrieve it
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Populate data for a set of random users to a Cassandra instance. | |
; | |
; Users consist of the following set of data: | |
; - a username [String] | |
; - a user ID [integer] | |
; - a flag indicating whether the user is "active" [boolean] | |
; - a list of location IDs for each user [list of integer] | |
; | |
; User records are keyed by username rather than user IDs, mainly because at the moment | |
; we only support strings for key values. The Cassandra API exposes keys as byte arrays | |
; so we could extend our Cassandra support to include other datatypes. | |
(use '[fencepost.avro]) | |
(use '[fencepost.cassandra]) | |
(import '(org.apache.commons.lang3 RandomStringUtils) | |
'(java.util Random) | |
) | |
; Utility function to combine our Avro lib with our Cassandra lib | |
(defn add_user [client username userid active locationids] | |
(let [userid_data (encode_with_schema userid) | |
active_data (encode_with_schema active) | |
locationids_data (encode_with_schema locationids)] | |
(insert client username "employee" "userid" userid_data) | |
(insert client username "employee" "active" active_data) | |
(insert client username "employee" "locationids" locationids_data) | |
) | |
) | |
; Generate a list of random usernames | |
(let [client (connect "localhost" 9160 "employees")] | |
(dotimes [n 10] | |
(let [username (RandomStringUtils/randomAlphanumeric 16) | |
random (Random.) | |
userid (.nextInt random 1000) | |
active (.nextBoolean random) | |
locationids (into [] (repeatedly 10 #(.nextInt random 100)))] | |
(add_user client username userid active locationids) | |
(println (format "Added user %s: [%s %s %s]" username userid active locationids)) | |
) | |
) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Retrieve information from the Cassandra database about one of our employees | |
(use '[fencepost.avro]) | |
(use '[fencepost.cassandra]) | |
(defn evaluate_user [slices username] | |
"Gather information for the specified user and display a minimal report about them" | |
; Note that the code below says nothing about types. We specify the column names we | |
; wish to access but whatever Cassandra + Avro supplies for the value of that column | |
; is what we get. | |
(let [user_data (range_slices_columns slices username) | |
userid (decode_from_schema (user_data :userid)) | |
active (decode_from_schema (user_data :active)) | |
locationids (decode_from_schema (user_data :locationids))] | |
(println (format "Username: %s" username)) | |
(println (format "Userid: %s" userid)) | |
(println (if (> userid 0) "Userid is greater than zero" "Userid is not greater than zero")) | |
(println (format "Active: %s" active)) | |
(println (if active "User is active" "User is not active")) | |
; Every user should have at least one location ID. | |
; | |
; Well, they would if we were able to successfully handle an Avro record. | |
;(assert (> (count locationids) 0)) | |
) | |
) | |
(let [client (connect "localhost" 9160 "employees") | |
key_slices (get_range_slices client "employee" "!" "~") | |
keys (range_slices_keys key_slices)] | |
(println (format "Found %d users" (count keys))) | |
(dotimes [n (count keys)] | |
(evaluate_user key_slices (nth keys n)) | |
) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'avro' | |
require 'cassandra' | |
def evaluate_avro_data bytes | |
# Define the meta-schema | |
meta_schema = Avro::Schema.parse("{\"type\": \"map\", \"values\": \"bytes\"}") | |
# Read the meta source and extract the contained data and schema | |
meta_datum_reader = Avro::IO::DatumReader.new(meta_schema) | |
meta_val = meta_datum_reader.read(Avro::IO::BinaryDecoder.new(StringIO.new(bytes))) | |
# Build a new reader which can handle the indicated schema | |
schema = Avro::Schema.parse(meta_val["schema"]) | |
datum_reader = Avro::IO::DatumReader.new(schema) | |
val = datum_reader.read(Avro::IO::BinaryDecoder.new(StringIO.new(meta_val["data"]))) | |
end | |
client = Cassandra.new('employees', '127.0.0.1:9160') | |
client.get_range(:employee,{:start_key => "!",:finish_key => "~"}).each do |k,v| | |
userid = evaluate_avro_data v["userid"] | |
active = evaluate_avro_data v["active"] | |
locationids = evaluate_avro_data v["locationids"] | |
puts "Username: #{k}, user ID: #{userid}, active: #{active}" | |
puts "User ID #{(userid > 0) ? "is" : "is not"} greater than zero" | |
puts "User #{active ? "is" : "is not"} active" | |
# Ruby's much more flexible notion of truthiness makes the tests above somewhat less | |
# compelling. For extra validation we add the following | |
"Oops, it's not a number" unless userid.is_a? Fixnum | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment