Skip to content

Instantly share code, notes, and snippets.

@cronin101
Last active August 29, 2015 13:57
Show Gist options
  • Select an option

  • Save cronin101/9410364 to your computer and use it in GitHub Desktop.

Select an option

Save cronin101/9410364 to your computer and use it in GitHub Desktop.
Sort-Merge Join, from ADBS Lecture 10
class MergeJoiner(object):
def __init__(self, input_a, input_b):
self.input_a, self.input_b = input_a, input_b
def joined(self):
# Using relation names from slides
R, S = self.input_a[:], self.input_b[:]
join = []
if not (R and S): return join
r_a, r_b = R.pop(0)
s_a, s_c = S.pop(0)
while True:
# Advance Outer relation whilst trailing Inner
while r_a < s_a:
try:
r_a, r_b = R.pop(0)
except IndexError:
return join
# Advance Inner relation whilst trailing Outer
while r_a > s_a:
try:
s_a, s_c = S.pop(0)
except IndexError:
return join
# Whilst keys equal:
# Increment Inner, adding resultant tuples to join but also storing as current group
group_vector = []
group_fst = None
while r_a == s_a:
group_fst = r_a
group_vector.append((s_a, s_c))
join.append((r_a, r_b, s_c))
try:
s_a, s_b = S.pop(0)
except IndexError:
break
# Advance Outer once Inner is no longer equal
try:
r_a, r_b = R.pop(0)
except IndexError:
return join
# If R has not changed despite the increment:
# Produce another set of resultant tuples for the past group.
while r_a == group_fst:
for s_a, s_c in group_vector:
join.append((r_a, r_b, s_c))
# Another increment of R, pre-check
try:
r_a, r_b = R.pop(0)
except IndexError:
return join
# Group can be released
group_vector = None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment