Data Splitting and Reduction Techniques in Python and NumPy
Efficiently splitting data structures and performing reduce operations are essential skills for optimizing computations in Python, especially for interviews. This document covers:
- Splitting lists, dictionaries, and matrices using Python and NumPy
- Common reduce operations for 1D and 2D data
# Splitting a list into n equal parts
from itertools import islice
def split_list(lst, n):
k, m = divmod(len(lst), n)
return [lst[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
lst = [1, 2, 3, 4, 5, 6, 7, 8, 9]
print(split_list(lst, 3))
Output:
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
import numpy as np
def split_array(arr, n):
return np.array_split(arr, n)
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
split_arrays = split_array(arr, 3)
print(split_arrays)
print(type(split_arrays))
print(type(split_arrays[0]))
Output:
[array([1, 2, 3]), array([4, 5, 6]), array([7, 8, 9])]
<class 'list'>
<class 'numpy.ndarray'>
✅ Each element of the returned list is a NumPy array.
from itertools import islice
def split_dict(d, n):
it = iter(d.items())
return [dict(islice(it, len(d) // n + (i < len(d) % n))) for i in range(n)]
d = {i: i**2 for i in range(10)}
print(split_dict(d, 3))
def split_matrix(matrix, rows, cols):
return [[row[i:i + cols] for row in matrix[j:j + rows]] for j in range(0, len(matrix), rows) for i in range(0, len(matrix[0]), cols)]
matrix = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]
print(split_matrix(matrix, 2, 2))
import numpy as np
matrix = np.arange(16).reshape(4, 4)
split_matrices = [np.hsplit(block, 2) for block in np.vsplit(matrix, 2)]
print(split_matrices)
from functools import reduce
lst = [1, 2, 3, 4, 5]
sum_result = reduce(lambda x, y: x + y, lst) # Sum
product_result = reduce(lambda x, y: x * y, lst) # Product
max_result = reduce(lambda x, y: x if x > y else y, lst) # Max
min_result = reduce(lambda x, y: x if x < y else y, lst) # Min
print(sum_result, product_result, max_result, min_result)
Output:
15 120 5 1
import numpy as np
arr = np.array([1, 2, 3, 4, 5])
print(np.sum(arr), np.prod(arr), np.max(arr), np.min(arr))
Output:
15 120 5 1
from collections import Counter
dicts = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 6}]
reduced_dict = Counter()
for d in dicts:
reduced_dict.update(d)
print(reduced_dict)
def sum_matrix(matrix):
return sum(map(sum, matrix))
def multiply_matrix(matrix):
from functools import reduce
return reduce(lambda x, y: x * y, [reduce(lambda a, b: a * b, row) for row in matrix])
def max_matrix(matrix):
return max(map(max, matrix))
def min_matrix(matrix):
return min(map(min, matrix))
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print(sum_matrix(matrix)) # Sum of all elements
print(multiply_matrix(matrix)) # Product of all elements
print(max_matrix(matrix)) # Max element
print(min_matrix(matrix)) # Min element
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(np.sum(matrix), np.prod(matrix), np.max(matrix), np.min(matrix))
Output:
45 362880 9 1
- Splitting Data: Python and NumPy provide efficient ways to split lists, dictionaries, and matrices.
- Reduce Operations: These are useful for summing, multiplying, finding min/max, and aggregating data efficiently.
- NumPy is preferred for performance, while pure Python provides more flexibility.
These techniques are essential for interviews and high-performance computing.