vurtun · May 23, 2024 18:34 · igorburago · May 23, 2024
diff --git a/kth.c b/kth.c
 /* Problem: Directory file view ui. Files can be sorted by different properties (name, size, ...). Ui itself only needs elements
            that are currently in the visible section of the list view from x and count k. So I want to use a fixed size buffer with up to k elements 
            and walk through all files in the directory and only have the final elements in the end in the fixed size buffer.
            Temp buffers are fine for me as long as they have a fixed at compile time size. For those familiar with SQL basically this is
            a SORT BY LIMIT begin, count.
             
  Idea:     Use Floyd–Rivest algorithm with average O(n) to find the element at index x. Walk over list again and skip all elements smaller than x 
            then use heap to sort for the k smallest elements afterwards. So we have for Floyd–Rivest and heap combined O(n*log(k))

  Problem:  Find x is not directly possible since not all elements are in memory because we have only fixed size buffer. 
            
  Idea:     Create a buffer of size 2𝑘. Read in 2𝑘 elements. Use Floyd–Rivest algorithm to partition the buffer
            so that the 𝑘 smallest elements are first; this takes 𝑂(𝑘) time. Now read in another 𝑘 items from your array into the
            buffer, replacing the 𝑘 largest items in the buffer, partition the buffer as before, and repeat.
  
  Problem:  k should be fixed size. So when we have k=256 we are not able to address any index greater than 256.
  
  Idea:     Multiple iterations in multiple of fixed k. So when k=256 then have index 256 initally and iterate over all n. Then
            new iteration we skip all elements smaller than the element at k=256 and use a new index. Repeat until we hit the index
            we want.
  
  Problem:  Kind of expensive. We get O(2*k * (n*0.5)/k) + k*log(k)
  
  Optimizations:
            - initially on index 0 or end page use heap with heapify up/down to sort first k elements directly
            - Instead of k being size of visible section use bigger window and only recalculate when reaching the end
            - Jumps don't happen to often (shortcuts + shift-scrollbar-click) so reusing the elements for the
              starting/ending element makes sense
            - File count smaller than k only neeed O(k*log(k))
            - start/end indicies are limited to n/2 by walking backwards
  */          
 // https://cs.stackexchange.com/questions/68125/finding-kth-smallest-element-from-a-given-sequence-only-with-ok-memory-on-t
 // https://en.wikipedia.org/wiki/Floyd%E2%80%93Rivest_algorithm
 // https://gist.github.com/igorburago/fa340429e75007a41335fb660d419e52
 #include <math.h>

 #define heap_parent(i) (((i)-1) >> 1)
 #define heap_right(i) (((i)+1) << 1)
 #define heap_left(i) (heap_right(i)-1)

 #define sgn(val) ((0 < val) - (val < 0))
 #define max(x,y) ((x<y)?y:x)
 #define min(x,y) ((x<y)?x:y)

 static inline void
 swpi(void *a, void *b) {
  int t = *(int*)a;
  *(int*)a = *(int*)b;
  *(int*)b = t;
 }
 static inline int
 cmpid(const void *a, const void *b) {
  return *(const int*)b - *(const int*)a;
 }
 static inline int
 cmpiu(const void *a, const void *b) {
  return *(const int*)a - *(const int*)b;
 }
 static inline void
 heapifyd(int *a, int i, int n, 
        int(*cmp)(const void*, const void*),
        void(*swp)(void *a, void *b)) {
  while (n) {
    int lo, l = heap_left(i);
    int r = heap_right(i);
    lo = (l < n && cmp(&a[l], &a[i]) < 0) ? l: i;
    lo = (r < n && cmp(&a[r], &a[lo]) < 0) ? r: lo;
    if (lo == i) {
      return;
    }
    swp(&a[i], &a[lo]);
    i = lo;
  }
 }
 static inline void
 heapifyu(int *a, int i, 
    int(*cmp)(const void*, const void*),
    void(*swp)(void *a, void *b)) {
  while (i && cmp(&a[heap_parent(i)], &a[i]) > 0) {
    int p = heap_parent(i);
    swp(&a[p], &a[i]);
    i = p;
  }
 }
 static int
 partition(int a[], int l, int r, int k,
          int(*cmp)(const void*, const void*),
          void(*swp)(void *a, void *b)) {
  while (r > l) {
    if (r - l > 600) {
      int n = r - l + 1;
      int i = k - l + 1;
      float z = logf(n);
      float s = 0.5 * expf(2 * z / 3);
      float sd = 0.5 * sqrtf(z * s * (n - s) / n) * sgn(i - n / 2);
      int l2 = max(l, (int)(k - i * s / n + sd));
      int r2 = min(r, (int)(k + (n - i) * s / n + sd));
      partition(a, l2, r2, k, cmp, swp);
    }
    int t = a[k];
    int i = l;
    int j = r;
    swp(&a[l], &a[k]);
    if (cmp(&a[r],&t) > 0) {
      swp(&a[l], &a[r]);
    }
    while (i < j) {
      swp(&a[i], &a[j]);
      i++, j--;
      for (;cmp(&a[i],&t)<0; ++i);
      for (;cmp(&a[j],&t)>0; --j);
    }
    if (cmp(&a[l],&t) == 0) {
      swp(&a[l], &a[j]);
    } else {
      swp(&a[r], &a[++j]);
    }
    if (j <= k) {
      l = j + 1;
    }
    if (k <= j) {
      r = j - 1;
    }
  }
  return a[k];
 }

 #include <stdio.h>
 #include <stdlib.h>

 extern int
 main(void) {
  int a[] = {1000, 400, 300, -10, 20, -50, 65, 47, 45, 50, 40, 75, 60, 100};
  int n = sizeof(a) / sizeof(a[0]);
  int off = 3;
  int k = 3;
  int i = 0;

  partition(a, 0, n-1, off, cmpiu, swpi);
  for (int j = 0; j < n; ++j) {
    printf("%d,", a[j]);
  }
  printf("\n");

  int lst[k], cnt = 0;
  for (i = off; i < off + k; ++i) {
    lst[cnt++] = a[i];
    heapifyu(lst, cnt-1, cmpid, swpi);
  }
  for (; i < n; ++i) {
    if (a[i] < lst[0]) {
      lst[0] = a[i];
      heapifyd(lst, 0, cnt, cmpid, swpi);    
    }
  }
  int res[k], num = 0;
  while (cnt) {
    res[k - ++num] = lst[0];
    lst[0] = lst[--cnt];
    heapifyd(lst, 0, cnt, cmpid, swpi);
  }
  printf("Smallest values: [");
  for (int i = 0; i < num; ++i) {
    printf("%d,", res[i]);
  }
  printf("]\n");
  return 0;
 }
	/* Problem: Directory file view ui. Files can be sorted by different properties (name, size, ...). Ui itself only needs elements
	that are currently in the visible section of the list view from x and count k. So I want to use a fixed size buffer with up to k elements
	and walk through all files in the directory and only have the final elements in the end in the fixed size buffer.
	Temp buffers are fine for me as long as they have a fixed at compile time size. For those familiar with SQL basically this is
	a SORT BY LIMIT begin, count.

	Idea: Use Floyd–Rivest algorithm with average O(n) to find the element at index x. Walk over list again and skip all elements smaller than x
	then use heap to sort for the k smallest elements afterwards. So we have for Floyd–Rivest and heap combined O(n*log(k))

	Problem: Find x is not directly possible since not all elements are in memory because we have only fixed size buffer.

	Idea: Create a buffer of size 2𝑘. Read in 2𝑘 elements. Use Floyd–Rivest algorithm to partition the buffer
	so that the 𝑘 smallest elements are first; this takes 𝑂(𝑘) time. Now read in another 𝑘 items from your array into the
	buffer, replacing the 𝑘 largest items in the buffer, partition the buffer as before, and repeat.

	Problem: k should be fixed size. So when we have k=256 we are not able to address any index greater than 256.

	Idea: Multiple iterations in multiple of fixed k. So when k=256 then have index 256 initally and iterate over all n. Then
	new iteration we skip all elements smaller than the element at k=256 and use a new index. Repeat until we hit the index
	we want.

	Problem: Kind of expensive. We get O(2k (n0.5)/k) + klog(k)

	Optimizations:
	- initially on index 0 or end page use heap with heapify up/down to sort first k elements directly
	- Instead of k being size of visible section use bigger window and only recalculate when reaching the end
	- Jumps don't happen to often (shortcuts + shift-scrollbar-click) so reusing the elements for the
	starting/ending element makes sense
	- File count smaller than k only neeed O(k*log(k))
	- start/end indicies are limited to n/2 by walking backwards
	*/
	// https://cs.stackexchange.com/questions/68125/finding-kth-smallest-element-from-a-given-sequence-only-with-ok-memory-on-t
	// https://en.wikipedia.org/wiki/Floyd%E2%80%93Rivest_algorithm
	// https://gist.github.com/igorburago/fa340429e75007a41335fb660d419e52
	#include <math.h>

	#define heap_parent(i) (((i)-1) >> 1)
	#define heap_right(i) (((i)+1) << 1)
	#define heap_left(i) (heap_right(i)-1)

	#define sgn(val) ((0 < val) - (val < 0))
	#define max(x,y) ((x<y)?y:x)
	#define min(x,y) ((x<y)?x:y)

	static inline void
	swpi(void a, void b) {
	int t = (int)a;
	(int)a = (int)b;
	(int)b = t;
	}
	static inline int
	cmpid(const void a, const void b) {
	return (const int)b - (const int)a;
	}
	static inline int
	cmpiu(const void a, const void b) {
	return (const int)a - (const int)b;
	}
	static inline void
	heapifyd(int *a, int i, int n,
	int(cmp)(const void, const void*),
	void(swp)(void a, void *b)) {
	while (n) {
	int lo, l = heap_left(i);
	int r = heap_right(i);
	lo = (l < n && cmp(&a[l], &a[i]) < 0) ? l: i;
	lo = (r < n && cmp(&a[r], &a[lo]) < 0) ? r: lo;
	if (lo == i) {
	return;
	}
	swp(&a[i], &a[lo]);
	i = lo;
	}
	}
	static inline void
	heapifyu(int *a, int i,
	int(cmp)(const void, const void*),
	void(swp)(void a, void *b)) {
	while (i && cmp(&a[heap_parent(i)], &a[i]) > 0) {
	int p = heap_parent(i);
	swp(&a[p], &a[i]);
	i = p;
	}
	}
	static int
	partition(int a[], int l, int r, int k,
	int(cmp)(const void, const void*),
	void(swp)(void a, void *b)) {
	while (r > l) {
	if (r - l > 600) {
	int n = r - l + 1;
	int i = k - l + 1;
	float z = logf(n);
	float s = 0.5 * expf(2 * z / 3);
	float sd = 0.5 * sqrtf(z * s * (n - s) / n) * sgn(i - n / 2);
	int l2 = max(l, (int)(k - i * s / n + sd));
	int r2 = min(r, (int)(k + (n - i) * s / n + sd));
	partition(a, l2, r2, k, cmp, swp);
	}
	int t = a[k];
	int i = l;
	int j = r;
	swp(&a[l], &a[k]);
	if (cmp(&a[r],&t) > 0) {
	swp(&a[l], &a[r]);
	}
	while (i < j) {
	swp(&a[i], &a[j]);
	i++, j--;
	for (;cmp(&a[i],&t)<0; ++i);
	for (;cmp(&a[j],&t)>0; --j);
	}
	if (cmp(&a[l],&t) == 0) {
	swp(&a[l], &a[j]);
	} else {
	swp(&a[r], &a[++j]);
	}
	if (j <= k) {
	l = j + 1;
	}
	if (k <= j) {
	r = j - 1;
	}
	}
	return a[k];
	}

	#include <stdio.h>
	#include <stdlib.h>

	extern int
	main(void) {
	int a[] = {1000, 400, 300, -10, 20, -50, 65, 47, 45, 50, 40, 75, 60, 100};
	int n = sizeof(a) / sizeof(a[0]);
	int off = 3;
	int k = 3;
	int i = 0;

	partition(a, 0, n-1, off, cmpiu, swpi);
	for (int j = 0; j < n; ++j) {
	printf("%d,", a[j]);
	}
	printf("\n");

	int lst[k], cnt = 0;
	for (i = off; i < off + k; ++i) {
	lst[cnt++] = a[i];
	heapifyu(lst, cnt-1, cmpid, swpi);
	}
	for (; i < n; ++i) {
	if (a[i] < lst[0]) {
	lst[0] = a[i];
	heapifyd(lst, 0, cnt, cmpid, swpi);
	}
	}
	int res[k], num = 0;
	while (cnt) {
	res[k - ++num] = lst[0];
	lst[0] = lst[--cnt];
	heapifyd(lst, 0, cnt, cmpid, swpi);
	}
	printf("Smallest values: [");
	for (int i = 0; i < num; ++i) {
	printf("%d,", res[i]);
	}
	printf("]\n");
	return 0;
	}