Skip to content

Instantly share code, notes, and snippets.

@fkztw
Last active August 29, 2015 14:20
Show Gist options
  • Save fkztw/45b099c266cb7fcf2aec to your computer and use it in GitHub Desktop.
Save fkztw/45b099c266cb7fcf2aec to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
import csv
import re
import collections
def main():
#data = parse_csv('bird0-1.csv')
data = parse_csv('birdsfinall98.csv')
count_data = count_birds(data)
print_count(count_data)
output_loc_count_csv('bird-loc_count.csv', count_data)
# Dealing with raw csv data
def parse_csv(filename):
data = []
with open(filename) as csvfile:
# only DictReader have fieldnames
# https://docs.python.org/3.4/library/csv.html#csv.DictReader
records = csv.DictReader(csvfile, delimiter=',')
fieldnames = records.fieldnames
fieldnames[0] = fieldnames[0][1:] # remove BOM characters (<U+FEFF>)
# https://en.wikipedia.org/wiki/Byte_order_mark
for row in records:
d = {}
for fieldname in fieldnames[:5]:
# 'date', 'time', 'location', 'lat', 'lng',
d[fieldname] = row[fieldname].strip()
d['birds'] = []
# ('count', 'birdName'), ('count.1', 'birdName.1'), ...
for count, bird in zip(fieldnames[5::2], fieldnames[6::2]):
try:
p = (row[bird].strip(), row[count].strip())
except:
continue
if p[0] not in ('', ' ', '0', '0.0'):
d['birds'].append(p)
data.append(d)
return data
def count_birds(data):
count_data = []
def _grep_num(count):
try:
return int(re.search('\d', count).group(0))
except:
return 0
for record in data:
n = sum(_grep_num(count) for _, count in record['birds'])
count_data.append((record['location'], n))
return count_data
def print_count(count_data):
n_birds = 0
d = collections.defaultdict(int)
for i, r in enumerate(count_data, 1):
# r => (location, count_of_r)
n_birds += r[1]
d[r[0]] += r[1]
print('第 {} 筆資料,位於『{}』,共有 {} 隻。'.format(i, r[0], r[1]))
print('總計: {} 隻'.format(n_birds))
def output_loc_count_csv(filename, count_data):
d = collections.defaultdict(int)
for r in count_data:
d[r[0]] += r[1]
with open(filename, 'w') as csvfile:
fieldnames = ['location', 'count']
# https://docs.python.org/3.4/library/csv.html#csv.DictWriter
writer = csv.DictWriter(
csvfile,
fieldnames=fieldnames,
delimiter=','
)
writer.writeheader()
for loc, count in d.items():
writer.writerow({fieldnames[0]: loc, fieldnames[1]: count})
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment