Created
August 18, 2012 09:30
-
-
Save daemianmack/3385630 to your computer and use it in GitHub Desktop.
org chart scrape-builder from FM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'set' | |
require 'hpricot' | |
require 'mechanize' | |
USER_ACCOUNT = "[email protected]" | |
PASSWORD = ARGV[0] | |
MAX_LINKS = 200 | |
SLEEP_INTERVAL = 0.5 | |
########### | |
# Helpers # | |
########### | |
module Enumerable | |
def uniq_by | |
h = {} | |
inject([]) {|a,x| h[yield(x)] ||= a << x} | |
end | |
end | |
class OmniscientHash < Hash | |
def produce key # default_proc for < 1.9 | |
self[key] = Person.new key unless self.has_key? key | |
self[key] | |
end | |
end | |
# Thought it would be neat to visually weight number of descendant | |
# reports to get a sense of departmental distribution. End result | |
# not smooth curve as hoped -- report counts distributed too chunkily. | |
def make_heat_map max | |
$colors = %w(magenta hotpink maroon crimson red orange yellow greenyellow steelblue cornflowerblue) | |
quanta = (1..max-1).step(max / $colors.count.to_f).to_a.reverse # e.g. [0.0, 9.16, 18.33...] | |
quanta.map!{|x| x.floor}.reverse # e.g. [165, 155, 146...] | |
$heat_map = quanta.zip $colors | |
$heat_map.push([0, "grey"]) # With zero reports, you get a boring color. | |
end | |
def render | |
`dot -Tsvg graph.out > temp.svg` | |
# git clone https://github.com/vidarh/diagram-tools | |
`xsltproc diagram-tools/notugly.xsl temp.svg > graph.svg` | |
#`rm graph.out temp.svg` | |
end | |
######################################################################### | |
# Screen-scrape the two worksimple pages that have direct links to each # | |
# FM employee. Stash reporting graph data in Person::PEOPLE. # | |
######################################################################### | |
def harvest | |
visited = [] | |
find_targets.uniq_by {|x| x.href}.each do |link| | |
p 'Visited max links!' and break if visited.length > MAX_LINKS | |
visited.push link | |
this_guy, his_manager = search_for_people link | |
# GOALLLLLLLLLL | |
Person::PEOPLE.produce(this_guy).reports_to(his_manager) | |
sleep SLEEP_INTERVAL | |
end | |
Person::PEOPLE.produce("Tom Davies").reports_to("Nate Perry-Thistle") # Tom's not in yet. | |
end | |
def find_targets | |
agent = Mechanize.new | |
page = agent.get 'https://federatedmedia.getworksimple.com/session/new' | |
page = page.form_with :action => '/session' do |f| | |
f.field_with(:name => "user_session[email]").value = USER_ACCOUNT | |
f.field_with(:name => "user_session[password]").value = PASSWORD | |
f.field_with(:name => "authenticity_token").value = f.authenticity_token | |
end.submit | |
page = page.links.select {|x| x.text == "Teams" }.first.click # Page 1 of listing. | |
targets = page.links.select {|x| x.href.match "/users/" } | |
page = page.links.select {|x| x.text.match "Next" }.first.click # Page 2 of listing. | |
targets.push(page.links.select {|x| x.href.match "/users/" }).flatten! | |
targets.reject! {|x| x.text.match "Profile" } # Snip my account edit pages. | |
end | |
def search_for_people link | |
h = Hpricot link.click.content | |
# \nThis Guy\n => This Guy | |
this_guy = h.search("div.name").innerHTML.split("\n").last.squeeze(" ") | |
# \n\nHis Manager\n => His Manager | |
his_manager = h.search('div.reviewer a.user').text.split("\n").last | |
# Workaround Amy Yeh's zombied entry ("/users/7894"). | |
his_manager = "Charlie Speight" if his_manager == "Amy Yeh" | |
puts "#{this_guy}'s manager is #{his_manager}" | |
[this_guy, his_manager] | |
end | |
####################################################################### | |
# Model for people, with each person holding a list of their reports. # | |
####################################################################### | |
class Person | |
attr_accessor :name, :pk, :reports | |
PEOPLE = OmniscientHash.new | |
def initialize name | |
@name = name | |
@pk = name.downcase.gsub(/[ '-]/, "_") # Punctuated names break DOT. :( | |
@reports = [] | |
PEOPLE[name] = self | |
end | |
def reports_to name | |
PEOPLE.produce(name).reports << self if name | |
end | |
# Recurse over person's reports, return total top-down count. | |
def self.count_sub_reports person | |
return 0 if person.reports.count == 0 | |
count = 0 | |
# Wants to be a list comprehension so bad. | |
person.reports.each do |r| | |
count += 1 + count_sub_reports(r) | |
end | |
count | |
end | |
end | |
########################################################################## | |
# Generate a graphviz chart showing the reporting structure described in # | |
# Person::PEOPLE. The three basic DOT concerns -- points, edges and # | |
# boxes -- fail to parse in our top-down rendering if intermixed, # | |
# requiring that we sort by type. # | |
########################################################################## | |
class OrgChart | |
def initialize root_person | |
@seen = Set.new | |
@points, @edges, @boxes = [], [], [] | |
node root_person | |
walk_reports_of root_person | |
emit_DOT | |
end | |
def node person | |
return false if @seen.member? person.pk | |
@seen << person.pk | |
color = apply_heat_map person | |
shape = person.reports.empty? ? "box" : "oval" | |
@boxes << %Q{ p#{person.pk} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] } | |
@boxes << %Q{ p#{person.pk+"1"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] } | |
@boxes << %Q{ p#{person.pk+"2"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] } | |
@boxes << %Q{ p#{person.pk+"3"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] } | |
@boxes << %Q{ p#{person.pk+"4"} [ shape = #{shape}, style="filled", fillcolor="#{color}", label="#{person.name}" ] } | |
end | |
def walk_reports_of person | |
person.reports.each do |r| | |
if [email protected]? r | |
@points << " p#{person.pk} [ shape = point ]" | |
@edges << " p#{person.pk} -> p#{r.pk}" | |
end | |
walk_reports_of r if node r | |
end | |
end | |
def apply_heat_map person | |
count = Person.count_sub_reports person | |
num, hex = $heat_map.find { |num, hex| count >= num } | |
hex | |
end | |
def emit_DOT | |
open("graph.out", "w") do |f| | |
f.write "digraph orgchart {" | |
[@points, @edges, @boxes].map {|x| f.write(x.join("\n")) } | |
f.write "}" | |
end | |
end | |
end | |
####### | |
# Go. # | |
####### | |
def main | |
# harvest | |
# make_heat_map Person.count_sub_reports(Person::PEOPLE["John Battelle"]) | |
# OrgChart.new Person::PEOPLE["John Battelle"] | |
p = YAML::load(open("marshalled.txt")) | |
make_heat_map Person.count_sub_reports(p["John Battelle"]) | |
OrgChart.new p["John Battelle"] | |
render | |
end | |
# ruby graph.rb [PASSWORD] | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment