Last active
April 10, 2017 21:17
-
-
Save cloudbank/0799e1bc980a45debd8801b863339837 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
*Given a large list, insert into a balanced BST, then find the distance between 2 nodes | |
*/ | |
//the BST tree is required by the question, n is unknown but could be large | |
//step 1: sort the data the best way we know for large data | |
//given a "list" and number N of items. | |
//Is this data in one list or not? We don't know. | |
public static BigDecimal NUM; | |
int numArrays = NUM / Integer.MAX_VALUE - 5; | |
//make that many arrays, chunk the "list" or stream of data. | |
//external sort comes to mind; then we have Stream API or Arrays.parallelSort: | |
private List<Integer> sortLargeList(List<Integer> list) { | |
// check for size of list wrt to maxsize of List | |
return list.parallelStream() // in parallel, not just concurrently! | |
.sorted().collect(Collectors.toList());// sort them | |
} | |
//step 2: | |
//"list" must be divided into numArrays subLists if it is not contained in one list. | |
//Obviously the data won't fit into one list if it is very large | |
//and then one must assume it is streaming from somewhere...and we can get the IO and put it into lists | |
//Although there is a merge function for balanced trees, we have to deal with what we have | |
//outside of just theory. So we could code insert and merge for red-black tree but that is very advanced. | |
//In in the end we want to find the distance between nodes in a tree, so TreeMap proves inadequate | |
//-- and has a limit to its size as well. So we can | |
//store memory adequate size trees across a load balancing algo for many machines...... | |
//Find how many trees can fit onto a machine and then we have ranges over machines for finding a node based on its original | |
//index | |
for (List l : List[] lists ) { <----sublists | |
Integer[] a = sortLargeList(list); <-- sort each one at max size | |
//step3: insert the sorted arrays into the tree...we will have many trees...if we have much data | |
addTree(insertToBST(a, 0, a.length)); //...<----stick the ith tree into the machine with that range over total#machines | |
} | |
//get the nodes from their tree | |
getInRange(node.index, node2.index)); <---- get the machine with its range and return the tree | |
//what if nodes are not in same tree? getInRange() must return an error message, else it returns the tree root | |
findDistance(root, node, node2); //use iterative bs, lca to get the distance; we could also use bfs | |
BSTNode insertToBST(Integer[] arr, int start, int end) { | |
/* Base Case */ | |
if (start > end) { | |
return null; | |
} | |
/* Get the middle element and make it root */ | |
int mid = (start + end) >>> 1; | |
BSTNode node = new BSTNode(arr[mid]); | |
/* | |
* Recursively construct the left subtree and make it left child of root | |
*/ | |
node.setLeft(insertToBST(arr, start, mid - 1)); | |
/* | |
* Recursively construct the right subtree and make it right child of | |
* root | |
*/ | |
node.setRight(insertToBST(arr, mid + 1, end)); | |
return node; | |
} | |
//step4: get the distance between nodes using iterative binary search, iterative lca. | |
private int findDistance(BSTNode root, BSTNode n1, BSTNode n2) { | |
int x = binarySearchDist(root, n1.getValue()).count; | |
int y = binarySearchDist(root, n2.getValue()).count; | |
int lcaDistance = lcaDist(root, n1, n2); | |
return (x + y) - 2 * lcaDistance; | |
} | |
private BSTNodePair binarySearchDist(BSTNode root, int key) { | |
int count = 0; | |
BSTNode curr = root; | |
while (curr != null) { | |
if (curr.getValue() > key) { | |
count++; | |
curr = curr.getLeft(); | |
} else if (curr.getValue() < key) { | |
count++; | |
curr = curr.getRight(); | |
} else { | |
return count-1; | |
} | |
} | |
return -1; | |
} | |
private int lcaDist(BSTNode root, BSTNode one, BSTNode two) { | |
// Check if one and two are in the root tree. | |
int count = 0; | |
while (root != null) { | |
if (root.getValue() < one.getValue() | |
&& root.getValue() < two.getValue()) { | |
root = root.getRight(); | |
count++; | |
} else if (root.getValue() > one.getValue() | |
&& root.getValue() > two.getValue()) { | |
root = root.getLeft(); | |
count++; | |
} else { | |
count++; | |
return count - 1; | |
} | |
} | |
return -1; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment