Last active
August 29, 2015 14:20
-
-
Save fkztw/45b099c266cb7fcf2aec to your computer and use it in GitHub Desktop.
#1LIikR_U (Python) [ptt.cc] https://www.ptt.cc/bbs/Python/M.1430965147.A.FDE.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
#-*- coding: utf-8 -*- | |
import csv | |
import re | |
import collections | |
def main(): | |
#data = parse_csv('bird0-1.csv') | |
data = parse_csv('birdsfinall98.csv') | |
count_data = count_birds(data) | |
print_count(count_data) | |
output_loc_count_csv('bird-loc_count.csv', count_data) | |
# Dealing with raw csv data | |
def parse_csv(filename): | |
data = [] | |
with open(filename) as csvfile: | |
# only DictReader have fieldnames | |
# https://docs.python.org/3.4/library/csv.html#csv.DictReader | |
records = csv.DictReader(csvfile, delimiter=',') | |
fieldnames = records.fieldnames | |
fieldnames[0] = fieldnames[0][1:] # remove BOM characters (<U+FEFF>) | |
# https://en.wikipedia.org/wiki/Byte_order_mark | |
for row in records: | |
d = {} | |
for fieldname in fieldnames[:5]: | |
# 'date', 'time', 'location', 'lat', 'lng', | |
d[fieldname] = row[fieldname].strip() | |
d['birds'] = [] | |
# ('count', 'birdName'), ('count.1', 'birdName.1'), ... | |
for count, bird in zip(fieldnames[5::2], fieldnames[6::2]): | |
try: | |
p = (row[bird].strip(), row[count].strip()) | |
except: | |
continue | |
if p[0] not in ('', ' ', '0', '0.0'): | |
d['birds'].append(p) | |
data.append(d) | |
return data | |
def count_birds(data): | |
count_data = [] | |
def _grep_num(count): | |
try: | |
return int(re.search('\d', count).group(0)) | |
except: | |
return 0 | |
for record in data: | |
n = sum(_grep_num(count) for _, count in record['birds']) | |
count_data.append((record['location'], n)) | |
return count_data | |
def print_count(count_data): | |
n_birds = 0 | |
d = collections.defaultdict(int) | |
for i, r in enumerate(count_data, 1): | |
# r => (location, count_of_r) | |
n_birds += r[1] | |
d[r[0]] += r[1] | |
print('第 {} 筆資料,位於『{}』,共有 {} 隻。'.format(i, r[0], r[1])) | |
print('總計: {} 隻'.format(n_birds)) | |
def output_loc_count_csv(filename, count_data): | |
d = collections.defaultdict(int) | |
for r in count_data: | |
d[r[0]] += r[1] | |
with open(filename, 'w') as csvfile: | |
fieldnames = ['location', 'count'] | |
# https://docs.python.org/3.4/library/csv.html#csv.DictWriter | |
writer = csv.DictWriter( | |
csvfile, | |
fieldnames=fieldnames, | |
delimiter=',' | |
) | |
writer.writeheader() | |
for loc, count in d.items(): | |
writer.writerow({fieldnames[0]: loc, fieldnames[1]: count}) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment