-
-
Save tonyseek/85187c45309f195be084 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pip-tools==0.3.5 | |
xlrd==0.9.3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
""" | |
中国联不通每月通话时间统计 | |
用法: | |
1. 去 http://iservice.10010.com/ 下载通话详单(Excel 格式),导出为 CSV | |
2. ./unicom-sumup.py *.csv | |
""" | |
from __future__ import print_function | |
import os | |
import re | |
import sys | |
import csv | |
from collections import defaultdict | |
from decimal import Decimal | |
from itertools import chain | |
import xlrd | |
DURATION_PATTERN = re.compile(r'(?:(\d+)小时)?(?:(\d+)分)?(?:(\d+)秒)?') | |
ID, TIME, DURATION, NUMBER, FEE = range(5) | |
FIELDS = '序号,通话起始时间,通话时长,对方号码,小计'.split(',') | |
def extract(row): | |
return [row[f] for f in FIELDS] | |
def to_minutes(s): | |
hour, minute, second = DURATION_PATTERN.match(s).groups() | |
minute = int(minute) if minute else 0 | |
if hour: | |
minute += int(hour) * 60 | |
if second: | |
minute += 1 | |
return minute | |
def xls_to_csv(path): | |
name, ext = os.path.splitext(path) | |
if ext in ('.xls', '.xlsx'): | |
csv_path = '%s.csv' % name | |
workbook = xlrd.open_workbook(path) | |
sheet = workbook.sheet_by_index(0) | |
with open(csv_path, 'wb') as csv_file: | |
csv_writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL) | |
for row_index in xrange(sheet.nrows): | |
row = [v.encode('utf-8') for v in sheet.row_values(row_index)] | |
csv_writer.writerow(row) | |
return csv_path | |
return path | |
def load_data(paths): | |
calls = defaultdict(list) | |
for row in chain.from_iterable( | |
csv.DictReader(open(xls_to_csv(p))) for p in paths): | |
if row['呼叫类型'] != '主叫': | |
continue | |
row = extract(row) | |
calls[row[TIME][:7]].append(row) | |
return sorted(calls.iteritems(), key=lambda (k, v): k) | |
def main(): | |
calls_by_month = load_data(sys.argv[1:]) | |
for month, calls in calls_by_month: | |
fee = sum(Decimal(c[FEE]) for c in calls) | |
minutes = sum(to_minutes(c[DURATION]) for c in calls) | |
print('%s: %s minutes - ¥%s' % (month, minutes, fee)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment