Last active
April 3, 2017 13:59
-
-
Save GINK03/3d2b299244c6888e2d94e9b6963b5a41 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import math | |
import sys | |
import itertools | |
import subprocess | |
import glob | |
import csv | |
from collections import OrderedDict as dict | |
import pickle | |
import re | |
headers = set("計上月,担当者CD,担当者,請求先CD,請求先科目CD,請求先名,請求先備考1,請求先備考2,請求先予備1,請求先予備2,請求先予備3,請求書送付先CD,請求書送付先担当者,請求書送付先住所,サービス概要,商品CD,商品名,項目,売上期間,注記,売上詳細備考,税抜売上,税込売上,粗利,支払予定日,税抜原価,税込原価,支払先CD,支払先名,支払先備考1,支払先備考2,支払先予備1,支払先予備2,支払先予備3,申込備考1,アカウントNo,アカウント備考,クライアントCD,クライアント名,クライアント備考1,クライアント備考2,クライアント予備1,クライアント予備2,クライアント予備3,契約備考1,契約備考2,その他備考1,その他備考2,請求書出力単位,プロジェクトCD,プロジェクト名,プロジェクト備考1,プロジェクト備考2,申込予算,マージン,管理費割合,値引方法CD,値引方法名,値引パターンCD,値引パターン名,コミッション割合,定額値引額,定額値引率,値引備考,更新ステータスCD,更新ステータス名,契約書回収日,契約No,契約NoSEQ,契約売上明細行No,契約開始日,契約終了日,売上明細開始日,売上明細終了日,事業部CD,事業部名,部門CD,部門名,大分類CD,大分類名,中分類CD,中分類名,小分類CD,小分類名,商品備考,商品予備2,商品予備3,商品予備4,商品予備5,商品予備6CD,商品予備6名称,商品予備7CD,商品予備7名称,商品予備8CD,商品予備8名称,商品予備9CD,商品予備9名称,商品予備10CD,商品予備10名称,商品予備11名称,商品予備12名称,商品予備13名称,商品予備14名称,商品予備15名称".split(",")) | |
def sum_monoid(header, monoids): | |
monad = dict() | |
for key in header: | |
if key in ["税抜売上", "税込売上", "粗利", "税抜原価", "税込原価"]: | |
#b = 0 | |
#for monoid in monoids: | |
# if monoid[key] == "": | |
# monoid[key] = 0 | |
# b += float(monoid[key]) | |
#for m in monoids: | |
# print("税込原価", m["税込原価"] ) | |
monad[key] = sum(list(map(lambda x:0. if x[key]=='' else float(x[key]), monoids))) | |
else: | |
if key in ["商品名"]: | |
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids))) | |
if result == []: | |
monad[key] = "" | |
else: | |
monad[key] = result[0].replace(' ', ' ') | |
if key in ["請求積"]: | |
# ここに分割条件をいれてください | |
pass | |
if key in ["計上月"]: | |
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids))) | |
monad[key] = result[0] | |
if key in ["クライアントCD"]: | |
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids))) | |
monad[key] = result[0] | |
if key in headers: | |
result = list(filter(lambda x:x!="", map(lambda x:x[key], monoids))) | |
if result != []: | |
monad[key] = result[0] | |
else: | |
monad[key] = "-" | |
return monad | |
def analystic(): | |
seller_type2lastoutput = {} | |
with open('./sellerid2lastoutput.txt', 'r') as f: | |
for line in f: | |
ents = line.split(' ') | |
seller_type = ents[0] | |
print("sellet type", seller_type) | |
lastoutput = line.split()[-1] | |
seller_type2lastoutput[seller_type] = lastoutput | |
for name in glob.glob('./*.csv.utf8'): | |
with open(name) as f: | |
cid_monoids = dict() | |
it = csv.reader(f, delimiter=',', quotechar='"') | |
header = next(it) | |
now_client_cd = None | |
monoidic = [] | |
for pi, ps in enumerate(it): | |
monoid = dict(list(zip(header, ps))) | |
if monoid['大分類名'] != "広告代理": | |
continue | |
if monoid.get('契約No') is None: | |
continue | |
try: | |
monoid['商品名'] = seller_type2lastoutput[monoid['商品名'].replace(' ', ' ')] | |
except KeyError as e: | |
print("cannot find e", e) | |
continue | |
# アドホックサニタイゼーション | |
monoid['クライアント名'] = monoid['クライアント名'].replace(' ', '') | |
cid = monoid['契約No'] + " " + monoid['クライアント名'] + " 直販" | |
if cid_monoids.get(cid) == None: | |
cid_monoids[cid] = [] | |
cid_monoids[cid].append(monoid) | |
cid_monad = {} | |
for cid, monoids in cid_monoids.items(): | |
cid_monad[cid] = sum_monoid(header, monoids) | |
open('./conv/%s.pkl'%name, 'wb').write(pickle.dumps(cid_monad)) | |
try: | |
sellertype_index = pickle.loads(open('sellertype_index.pkl', 'rb').read()) | |
except FileNotFoundError as e: | |
from collections import OrderedDict | |
with open('./sellerid2lastoutput.txt', 'r') as f: | |
sellertype_index = OrderedDict() | |
for line in f: | |
line = line.strip() | |
lastoutput = line.split()[-1] | |
if sellertype_index.get(lastoutput) is None: | |
sellertype_index[lastoutput] = len(sellertype_index) | |
open('sellertype_index.pkl', 'wb').write(pickle.dumps(sellertype_index)) | |
with open('./kasu.txt', 'r') as f: | |
client_name_flag = {} | |
for excel in f: | |
excel = excel.strip() | |
ents = excel.split() | |
client_name = ents[0] | |
flag = int(ents[-1])-1 | |
client_name_flag[client_name] = flag | |
## 企業コード, 商流(comercial distributeコード) | |
clientcd_cdist = {} | |
cdist_index = {} | |
with open('./cdist_clientcd.txt') as f: | |
for line in f: | |
line = line.strip() | |
ents = line.split() | |
cdist = ents[0] | |
clientcd = ents[-1] | |
clientcd_cdist[clientcd] = cdist | |
for clientcd, cdist in clientcd_cdist.items(): | |
if cdist_index.get(cdist) is None: | |
cdist_index[cdist] = len(cdist_index) | |
open('cdist_index.pkl', 'wb').write(pickle.dumps(cdist_index)) | |
def _client_name_vectorizer(client_name): | |
vec = [0.]*len(cdist_index) | |
vec[cdist_index[clientcd_cdist[client_name]]] = 1. | |
return '*'.join(map(str, vec)) | |
def _seller_type_vectorizer(type_name): | |
vec = [0.]*len(sellertype_index) | |
vec[sellertype_index[type_name]] = 1. | |
return '*'.join(map(str, vec)) | |
def tinger(): | |
class Amounts(object): | |
def __init__(self): | |
self.uriage_nozei = 0. | |
self.uriage_zei = 0. | |
self.arari = 0. | |
self.tax_genka = 0. | |
self.notax_genka = 0. | |
self.monad = {} | |
key_amount = {} | |
for name in glob.glob('./conv/*.pkl'): | |
cid_monad = pickle.loads(open(name, 'rb').read()) | |
for cid, monad in cid_monad.items(): | |
last_word = cid.split()[-1] | |
client_name = cid.split()[-2] | |
seller_type = monad["商品名"] | |
client_cd = monad["クライアントCD"] | |
date = monad["計上月"] | |
#key = monad['商品名'] | |
key = date + "__SEP__" + client_cd | |
#key = date + "*" + last_word + "*" + seller_type.replace(" ", "") | |
#key = date + "*" + client_name | |
#key = date + "*" + client_cd + "*" + client_name + "*" + seller_type | |
#key = client_name + "*" + seller_type + "*" + last_word | |
try: | |
#key = client_name + "*" + _client_name_vectorizer(client_cd) + "*" + str(client_name_flag[client_name]) +" * " + _seller_type_vectorizer(seller_type) | |
#key = client_name + "*" + _client_name_vectorizer(client_cd) + "*" + str(client_name_flag[client_name]) | |
pass | |
except KeyError as e: | |
print(e, file=sys.stderr) | |
continue | |
uriage_nozei = monad["税抜売上"] | |
uriage_zei = monad["税込売上"] | |
arai = monad["粗利"] | |
notax_genka = monad["税抜原価"] | |
tax_genka = monad["税込原価"] | |
if key_amount.get(key) is None: key_amount[key] = Amounts() | |
key_amount[key].uriage_nozei += uriage_nozei | |
key_amount[key].uriage_zei += uriage_zei | |
key_amount[key].arari += arai | |
key_amount[key].tax_genka += tax_genka | |
key_amount[key].notax_genka += notax_genka | |
key_amount[key].monad = monad | |
for key, amount in key_amount.items(): | |
del amount.monad['税抜売上'] | |
del amount.monad['税込売上'] | |
del amount.monad['粗利'] | |
del amount.monad["税抜原価"] | |
del amount.monad["税込原価"] | |
data = "__SEP__".join(["%s=%s"%(k,v) for k,v in amount.monad.items()]) | |
print("__SEP__".join(list(map(lambda x:re.sub(r"\s| ", "", str(x)), [key, data, \ | |
"税抜売上=%d"%amount.uriage_nozei, \ | |
"税込売上=%d"%amount.uriage_zei, \ | |
"粗利=%d"%amount.arari, \ | |
"税抜原価=%d"%amount.notax_genka, \ | |
"税込原価=%d"%amount.tax_genka])))) | |
def conv(): | |
for name in glob.glob('./obic/*.csv'): | |
last_name = name.split('/')[-1] | |
ret = os.system("cat %s | nkf -S -w > %s.utf8"%(name, last_name) ) | |
print(ret) | |
if __name__ == '__main__': | |
if '--conv' in sys.argv: | |
conv() | |
if '-a' in sys.argv: | |
analystic() | |
if '-t' in sys.argv: | |
tinger() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment