Skip to content

Instantly share code, notes, and snippets.

@jspacker
Last active December 15, 2015 16:08
Show Gist options
  • Save jspacker/5286656 to your computer and use it in GitHub Desktop.
Save jspacker/5286656 to your computer and use it in GitHub Desktop.
twitter-pagerank controlscript: the pagerank loop
iteration = Pig.compileFromFile(self.iteration_script)
for i in range(self.max_num_iterations):
print "Starting iteration step: %s" % str(i + 1)
# Append the iteration number to the input/output stems
iteration_input = self.preprocess_pageranks if i == 0 else (self.iteration_pageranks_prefix + str(i-1))
iteration_pageranks_output = self.iteration_pageranks_prefix + str(i)
iteration_rank_changes_output = self.iteration_rank_changes_prefix + str(i)
iteration_bound = iteration.bind({
"INPUT_PATH": iteration_input,
"DAMPING_FACTOR": self.damping_factor,
"NUM_NODES": num_nodes,
"PAGERANKS_OUTPUT_PATH": iteration_pageranks_output,
"AGG_RANK_CHANGE_OUTPUT_PATH": iteration_rank_changes_output
})
iteration_stats = iteration_bound.runSingle()
# If we're below the convergence threshold break out of the loop.
aggregate_rank_change = long(str(iteration_stats.result("aggregate_rank_change").iterator().next().get(0)))
if aggregate_rank_change < convergence_threshold:
print "Sum of ordering-rank changes %d under convergence threshold %d. Stopping." \
% (aggregate_rank_change, convergence_threshold)
break
elif i == self.max_num_iterations-1:
print ("Sum of ordering-rank changes %d " % aggregate_rank_change) + \
("above convergence threshold %d but hit max number of iterations. " % convergence_threshold) + \
"Stopping."
else:
print "Sum of ordering-rank changes %d above convergence threshold %d. Continuing." \
% (aggregate_rank_change, convergence_threshold)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment