Skip to content

Instantly share code, notes, and snippets.

@kirankotari
Last active March 20, 2019 05:28
Show Gist options
  • Save kirankotari/0e01e9fec6936467a1a5e7fd09506a5b to your computer and use it in GitHub Desktop.
Save kirankotari/0e01e9fec6936467a1a5e7fd09506a5b to your computer and use it in GitHub Desktop.
# Reading csv file data in general..!
import time
path = '../input/mean/csv/data2.csv'
start = time.perf_counter()
data = []
for line in open(path):
row = []
for col in line.strip().split(','):
if col:
row.append(float(col))
data.append(row)
mid = time.perf_counter()
sum_value = 0
count = 0
for each in data:
sum_value += sum(each)
count += len(each)
mean = sum_value/count
end = time.perf_counter()
# print(data)
print(mean)
print('time taken basic : {:6f}s'.format(mid - start))
print('time taken: {:6f}s'.format(end - start))
# Using NumPy with asarray
import numpy as np
start = time.perf_counter()
data = []
for line in open(path):
data.append(line.strip().split(','))
data = np.asarray(data, float) # better approach when compared to np.loadtxt
mid = time.perf_counter()
mean = np.mean(data)
end = time.perf_counter()
# print(data)
print(mean)
print('time taken by np.asarray: {:6f}s'.format(mid - start))
print('time taken: {:6f}s'.format(end - start))
# Using NumPy with loadtxt
import numpy as np
start = time.perf_counter()
data = np.loadtxt(path, delimiter=',') # time consuming when compared to np.asarray
mid = time.perf_counter()
mean = np.mean(data)
end = time.perf_counter()
# print(data)
print(mean)
print('time taken by np.loadtxt: {:6f}s'.format(mid - start))
print('time taken: {:6f}s'.format(end - start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment