Created
June 3, 2015 02:17
-
-
Save schmohlio/f3d6866b9b3174f1fb1a to your computer and use it in GitHub Desktop.
copy all missing servers based on instructions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
DataSync | |
Makes instructions to copy datasets to servers missing backups | |
based on input data. | |
- Ensure that each data center has a copy of every data set. | |
- Every dataset is included in at least 1 data center. | |
makes use of set operations. | |
''' | |
import fileinput | |
class DataSync(): | |
MAX_LINE_N = 10000 | |
''' may want to expand to other types of input, | |
i.e. a list that can be sorted in place ''' | |
def __init__(self, num_centers): | |
# could use this to assert that all inputs were read. | |
self.num_centers = num_centers | |
# set of datacenters, parameterized on 1..N | |
self.datacenters = set(range(1, self.num_centers+1)) | |
# hashmap of dataset with set of backup centers | |
self.dataset_locs = {} | |
# instructions to print. list of 3-tuples | |
self.instructions = [] | |
''' string of space separated ints to list of strings ''' | |
@staticmethod | |
def clean_line(line): | |
new_line = line.replace('\n','').split(' ') | |
return map(lambda x: int(x), new_line) | |
''' void | |
input: lines indexed by datacenter with dataset ids | |
creates a hashmap by dataset, with a set of datacenters backed up. | |
also adds datacenter indices to set | |
''' | |
def _persist_dataset_locations(self, lines): | |
lines = [self.clean_line(l) for l in lines if l] # watch blanks | |
for index, ds_list in enumerate(lines): | |
index += 1 | |
for ds_id in ds_list: | |
if ds_id in self.dataset_locs: | |
self.dataset_locs[ds_id].add(index) | |
else: | |
self.dataset_locs[ds_id] = {index} | |
return True # status | |
def create_instructions_from_log(self, lines): | |
self._persist_dataset_locations(lines) | |
for ds_id, dc_set in self.dataset_locs.items(): | |
missing = self.datacenters - dc_set | |
if len(missing) == 0: continue | |
# arbitrary server to copy from. may want to improve | |
from_id = next(iter(dc_set)) | |
instruction = [(ds_id, from_id, to_dc) for to_dc in missing] | |
self.instructions.extend(instruction) | |
return self | |
def show(self): | |
template = "%d %d %d" | |
instructions = [template % (i,j,k) for i,j,k in self.instructions] | |
for line in instructions: | |
print line | |
print 'done' | |
def main(): | |
INPUT = [line for line in fileinput.input()] | |
N = int(INPUT[0]) | |
LOG = INPUT[1:] | |
worker = DataSync(N) | |
worker \ | |
.create_instructions_from_log(LOG) \ | |
.show() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment