Created
June 30, 2014 15:57
-
-
Save cypreess/3302004d3244501f7c8c to your computer and use it in GitHub Desktop.
AWK is 3x slower than PYTHON :(
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The task: cut column 1 and 2 and reorder them 2,1 | |
# TLDR: python 15s awk 1m24s on 1.2 GB text file | |
# WAT? | |
# Dataset: | |
iMac27~/dev/awk_vs_python ls -alh test_data.2 | |
-rw-r--r-- 1 cypreess staff 1.2G Jun 30 17:33 test_data.2 | |
iMac27~/dev/awk_vs_python head -n 2 test_data.2 | |
3460835780,NOWY DOTYKOWY LG T375 Wi-Fi DUAL SIM + GRATISY FV,238.0,0.0,1,238.0,16,1,0,1377263058,-515,Warszawa,7,1,127367,1,http://img13.allegroimg.pl/photos/128x96/34/60/83/57/3460835780,1,14.0,0,0,1,0,0,48,0,None,107312,jas1,2002,3 | |
3460836407,NOWY SAMSUNG GALAXY NOTE 2 N7100 WHITE FV 23%,1799.0,0.0,1,1799.0,1,1,0,1377263086,-487,Kraków,6,1,125211,1,http://img08.allegroimg.pl/photos/128x96/34/60/83/64/3460836407,1,19.0,0,0,0,0,1,72,0,None,28397872,-Max_Mobile-,1707,3 | |
# Python first run | |
iMac27~/dev/awk_vs_python cat test.py | |
import sys | |
with open(sys.argv[2]) as f: | |
for l in f: | |
x = l.split(sys.argv[1]) | |
print "%s%s%s" % (x[1], sys.argv[1], x[0]) | |
iMac27~/dev/awk_vs_python time python test.py , test_data.2 > /dev/null | |
real 0m26.576s | |
user 0m26.111s | |
sys 0m0.435s | |
iMac27~/dev/awk_vs_python time python test.py , test_data.2 > /dev/null | |
real 0m26.588s | |
user 0m25.894s | |
sys 0m0.448s | |
iMac27~/dev/awk_vs_python time python test.py , test_data.2 > /dev/null | |
real 0m25.498s | |
user 0m24.868s | |
sys 0m0.449s | |
# AWK run | |
iMac27~/dev/awk_vs_python time awk -F, '{print $2 $1}' test_data.2 > /dev/null | |
real 1m34.901s | |
user 1m34.153s | |
sys 0m0.553s | |
iMac27~/dev/awk_vs_python time awk -F, '{print $2 $1}' test_data.2 > /dev/null | |
real 1m35.660s | |
user 1m34.441s | |
sys 0m0.582s | |
# Python super duper optimization | |
iMac27~/dev/awk_vs_python cat test.py | |
import sys | |
with open(sys.argv[2]) as f: | |
for l in f: | |
x = l.split(sys.argv[1], 3) # <-- here | |
print "%s%s%s" % (x[1], sys.argv[1], x[0]) | |
(nlp)iMac27~/dev/awk_vs_python time python test.py , test_data.2 > /dev/null | |
real 0m15.032s | |
user 0m14.593s | |
sys 0m0.408s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment