Created
June 13, 2013 16:58
-
-
Save daemonfire300/5775402 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime as dt | |
import random | |
def wikipedia_partition(list_set, left_end_index, right_end_index, pivot_index): | |
pivot_val = list_set[pivot_index] | |
list_set[pivot_index], list_set[right_end_index] = list_set[right_end_index], list_set[pivot_index] | |
temp_index = left_end_index | |
for i in range(left_end_index, right_end_index-1): | |
if pivot_val > list_set[i]: | |
list_set[temp_index], list_set[i] = list_set[i], list_set[temp_index] | |
temp_index += 1 | |
list_set[right_end_index], list_set[temp_index] = list_set[temp_index], list_set[right_end_index] | |
return (list_set, temp_index) | |
def wikipedia_select(list_set, left_end_index, right_end_index, k_mean): | |
pass | |
''' | |
partition into chunks of size 5 + maximum 1 of size len(list_set) mod 5 | |
''' | |
def partition(list_set): | |
result = [] | |
if(len(list_set) > 1): | |
i = 0 | |
temp_list_list_set = [] | |
while(i < len(list_set)): | |
j = i | |
temp_list_set = [] | |
while(j < (i+5) and j < len(list_set)): | |
temp_list_set.append(list_set[j]) | |
#print temp_list_set | |
j += 1 | |
temp_list_list_set.append(temp_list_set) | |
i += 5 | |
result = temp_list_list_set | |
return result | |
else: | |
return list_set | |
def select(B, k): | |
''' | |
if(k > len(B) - 1): | |
return "Geht nicht" | |
else: | |
''' | |
if(len(B) <= 5): | |
B.sort() | |
#print "B: % s" % B[k-1] | |
return B[k-1] | |
else: | |
p_list_set = partition(B) | |
#print p_list_set | |
median_list_set = [] | |
for lst in p_list_set: | |
lst.sort() | |
length = len(lst) | |
index = length/2 | |
#print "lst(sorted): %s index: %s value: %s" % (lst, index, lst[index]) | |
median_list_set.append(lst[index]) | |
#print "median_set: %s " % median_list_set | |
subset_one = [] | |
subset_two = [] | |
x = select(median_list_set, ((len(median_list_set)+1)/2)) | |
for h in B: | |
if(h <= x): | |
subset_one.append(h) | |
else: | |
subset_two.append(h) | |
if(k <= len(subset_one)): | |
return select(subset_one, k) | |
else: | |
return select(subset_two, (k-len(subset_one))) | |
def heapsort(A): | |
def heapify(A): | |
start = (len(A) - 2) / 2 | |
while start >= 0: | |
siftDown(A, start, len(A)-1) | |
start -= 1 | |
def siftDown(A, start, end): | |
root = start | |
while root * 2 + 1 <= end: | |
child = root * 2 + 1 | |
if child +1 <= end and A[child] < A[child +1]: | |
child += 1 | |
if child <= end and A[root] < A[child]: | |
A[root], A[child] = A[child], A[root] | |
root = child | |
else: | |
return | |
heapify(A) | |
end = len(A) - 1 | |
while end > 0: | |
A[end], A[0] = A[0], A[end] | |
siftDown(A, 0, end - 1) | |
end -= 1 | |
def median_heap(A): | |
set_length = len(A)/2 | |
result_set = A | |
i = 0 | |
while i < set_length: | |
heapsort(result_set) | |
result_set.pop(0) | |
i += 1 | |
return result_set | |
""" | |
Ab 10^6 Elementen beginnt der Rechner laengere Zeit zu rechnen (2+sekunden). | |
Ab 10^4 Elementen schmeisst das skript bei randomisierten Zahlen einen recursion depth error. | |
Ab 10^3 Elementen scheint der heapsort langsamer zu werden und der median algorithmus performnt besser. | |
""" | |
test_len = 1000 | |
B = [x for x in range(test_len)] | |
del(B[0]) | |
B.sort() | |
random_list = [random.randint(1,test_len) for x in xrange(test_len)] | |
#print B | |
k = 10 | |
"""print partition(B)""" | |
print "________________________________________________________" | |
#print select(B, k) | |
start = dt.datetime.now() | |
print "ret: %s" % select(B, k) | |
end = dt.datetime.now() | |
print "end %s" % (end-start) | |
print "________________________________________________________" | |
start = dt.datetime.now() | |
print "ret: %s" % select(random_list, k) | |
end = dt.datetime.now() | |
print "end %s" % (end-start) | |
print "________________________________________________________" | |
#print random_list[0:30] | |
start = dt.datetime.now() | |
heapsort(random_list) | |
end = dt.datetime.now() | |
print "end %s" % (end-start) | |
#print random_list[0:30] | |
print "________________________________________________________" | |
random_list = [random.randint(1,test_len) for x in xrange(test_len)] | |
#print random_list | |
start = dt.datetime.now() | |
median_via_heap = median_heap(random_list) | |
print "median via heap: %s " % median_via_heap[0] | |
end = dt.datetime.now() | |
print "end %s" % (end-start) | |
print "________________________________________________________" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment