Skip to content

Instantly share code, notes, and snippets.

@lega911
Created August 2, 2019 20:47
Show Gist options
  • Save lega911/d8e66cecbe7e72555c0adc46c2be48b5 to your computer and use it in GitHub Desktop.
Save lega911/d8e66cecbe7e72555c0adc46c2be48b5 to your computer and use it in GitHub Desktop.
load: 0.369427308000013
find: 0.03731154500019329
sample 1: ['6.19889798576753', '306.03317783754981', 'NCV']
sample 2: ['6.19889798576753', '306.03317783754981', 'NCV']
import re
import timeit
from collections import defaultdict
data = open('GBR_2016_6_Inventory_28062017_111353.xml').read()
def find():
return re.findall('<year name="2014" uid="00984BA3-E7AE-43F6-BD60-1C9807D6FC7B"><record><value>([^<]*)<', data)
def load():
r = defaultdict(list)
for i in re.findall(r'<year name="(\d+)" uid="([^"]+)"><record><value>([^<]*)<', data):
r[i[:2]].append(i[2])
return r
print('load:', timeit.timeit('load()', globals=globals(), number=1))
print('find:', timeit.timeit('find()', globals=globals(), number=1))
print('sample 1:', load()[('2014', '00984BA3-E7AE-43F6-BD60-1C9807D6FC7B')][-6:-3])
print('sample 2:', find()[-6:-3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment