Created
March 24, 2023 23:02
-
-
Save alperaydyn/c884414718214b6d40222743889a8888 to your computer and use it in GitHub Desktop.
Google Drive/My Drive/Projects/Bilsem/Bilsem_Bagislar.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-24T20:48:01.524739Z", | |
"start_time": "2023-03-24T20:48:00.861017Z" | |
}, | |
"trusted": true | |
}, | |
"id": "31b328fd", | |
"cell_type": "code", | |
"source": "import pandas as pd\nimport numpy as np\nimport os\nimport re\nimport locale\nlocale.setlocale(locale.LC_ALL, 'tr_TR')\nimport openpyxl\nfrom openpyxl.formula.translate import Translator", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-24T22:31:30.248419Z", | |
"start_time": "2023-03-24T22:31:30.201271Z" | |
}, | |
"code_folding": [ | |
0, | |
25, | |
51, | |
68, | |
96, | |
185 | |
], | |
"trusted": true | |
}, | |
"id": "e67bf3c6", | |
"cell_type": "code", | |
"source": "def read_hareketler():\n hareket_files = [f for f in os.listdir('data') if 'Hareket' in f and f[0]!='~']\n\n hareketler = []\n\n for hareket_file in hareket_files:\n wb = openpyxl.load_workbook(f'data/{hareket_file}')\n\n # look for 'Tarih' for caption row\n for i in range(1,20):\n # read the cell value for colum A and row {i}\n cell_value = wb.worksheets[0].cell(i,1).value\n\n # if cell value is 'Tarih' then data starts from the following row, skip top {i} rows\n if cell_value=='Tarih':\n hareketler.append(pd.read_excel(f'data/{hareket_file}', skiprows=i-1))\n break\n\n df = pd.concat(hareketler)\n #print(df.shape)\n df = df.drop_duplicates()\n #print(df.shape)\n df = df.reset_index(drop=True)\n return df\n\ndef clear(x):\n non_alpha = r'[^a-zA-ZçğıöşüÇĞİÖŞÜ\\u00E7\\u011F\\u0131\\u00F6\\u015F\\u00FC\\u00C7\\u011E\\u0130\\u00D6\\u015E\\u00DC]'\n ret = x\n ret = re.sub('SN: ','SN:', ret)\n ret = re.sub('SN:\\d+','_ ', ret)\n ret = re.sub('Banka: \\d+','_ ', ret)\n ret = re.sub('GönBanka:\\d+ ','_ ', ret)\n ret = re.sub('GönŞube:\\d+ ','_ ', ret)\n ret = re.sub('EftRef:\\d+ ','_ ', ret)\n ret = re.sub('Eft Otomatik Muhasebe',' ', ret)\n ret = re.sub('Eft Otomatik M.*?(?=\\s|$)','_ ',ret)\n ret = re.sub('Eft O.*?(?=\\s|$)','_ ',ret)\n ret = ret.replace('_ ','')\n ret = ret.replace('i','İ').upper()\n \"\"\"\n ret = ret.replace('Ç','C')\n ret = ret.replace('Ğ','G')\n ret = ret.replace('İ','I')\n ret = ret.replace('Ö','O')\n ret = ret.replace('Ş','S')\n ret = ret.replace('Ü','U')\n \"\"\"\n ret = re.sub(non_alpha, ' ', ret)\n ret = ret.strip()\n return ret\n\ndef cmatch(x, tokens):\n tokens = np.unique(tokens).tolist()\n isim = x.İSİM.split(' ')\n \n anne = clear(x['BABA ADI']).split(' ')\n # anne/baba adı veya soyadı çocuğun ismi içinde olduğu zaman çift sayıyor\n anne = [a for a in anne if not a in isim]\n \n baba = clear(x['ANNE ADI']).split(' ')\n # anne/baba adı veya soyadı çocuğun ismi içinde olduğu zaman çift sayıyor\n baba = [b for b in baba if not b in isim]\n \n m1 = sum([1 for t in tokens for o in isim if t==o])\n m2 = sum([0.3 for t in tokens for o in anne if t==o])\n m3 = sum([0.3 for t in tokens for o in baba if t==o])\n return m1+m2+m3\n\ndef aciklama_match(x, do):\n x = clear(x)\n \n # manuel müdahaleler -------------------------------------\n x = x.replace('KEREM ÇINAR','KEREM')\n # manuel müdahaleler -------------------------------------\n \n tokens = [t for t in x.split(' ') if t!='']\n\n dff = do[['İSİM','ANNE ADI','BABA ADI']].assign(PUAN=do.fillna('').apply(lambda x: cmatch(x, tokens), axis=1))\n dff = dff[dff.PUAN>1].sort_values('PUAN', ascending=False)\n max_puan = dff.PUAN.max()\n dff = dff[dff.PUAN==max_puan]\n \n if dff.shape[0]==0:\n oid=-1\n elif dff.shape[0]==1:\n oid=dff.iloc[0,].name\n else:\n oid=-2\n \n ret = pd.Series({'ÖğrenciID': oid ,\n 'EşleşmeAdet': len(dff),\n 'Eşleşenler':dff.reset_index().values,\n })\n \n return ret\n\ndef excelize(do, dff, file_name):\n # create file -------------------------------------------------------------\n \n do.to_excel(file_name, sheet_name='Ogrenciler') # save ogrenciler to file\n\n # open file ---------------------------------------------------------------\n book = openpyxl.load_workbook(file_name, )\n writer = pd.ExcelWriter(file_name, engine='openpyxl') \n writer.book = book\n writer.sheets = dict((ws.title, ws) for ws in book.worksheets)\n\n dff.to_excel(writer, \"Hareketler\") # save Hareketler to file\n writer.save()\n\n # re-open ogrenciler to add hareket data ---------------------------------\n book = openpyxl.load_workbook(file_name)\n ws = book['Ogrenciler']\n\n # calculate distinct dates from hareketler -------------------------------\n dates = dff.Dönem.drop_duplicates().sort_values().values.tolist()\n\n # find last colum to add dates\n max_col_num = ws.max_column + 1\n max_col_cell = ws.cell(1, max_col_num)\n max_col_adr1 = f'{max_col_cell.column_letter}${max_col_cell.row}'\n max_col_adr2 = f'{max_col_cell.column_letter}{max_col_cell.row}'\n \n \n # write calculation formula to first cell --------------------------------\n formula = f'=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$J:$J,Ogrenciler!{max_col_adr1},Hareketler!$G:$G,\">0\")'\n formula_ex = f'=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$J:$J,Ogrenciler!{max_col_adr1},Hareketler!$G:$G,\"<0\")'\n ws[max_col_adr2] = formula\n ws.cell(do.shape[0]+2,1).value = -1 # Tutar>0, Bulunamayan Gelir Kalemleri\n ws.cell(do.shape[0]+2,3).value = 'Bulunamayan Gelirler' \n ws.cell(do.shape[0]+3,1).value = -1 # Tutar<0, Gider Kalemleri\n ws.cell(do.shape[0]+3,3).value = 'Gider Toplamı' \n\n # write all dates to columns and copy the formula to all cells -----------\n # --- Income transactions ----\n for i,d in enumerate(dates):\n ws.cell(1, max_col_num+i).value = d\n for r in range(2,do.shape[0]+3):\n c = ws.cell(r, max_col_num+i)\n c.value = Translator(formula, origin=\"X2\").translate_formula(f'{c.column_letter}{c.row}')\n # --- Expenditures ------------\n for i,d in enumerate(dates):\n c = ws.cell(r+1, max_col_num+i)\n c.value = Translator(formula_ex, origin=\"X2\").translate_formula(f'{c.column_letter}{c.row}')\n \n # Toplam sütunu -----------------------------------------------------------\n start_col_num = ws.max_column + 1\n \n # Atölye Sütunları\n ekler = ['*', 'KİMYA', 'SERAMİK', 'RESİM', 'MÜZİK', 'ROBOT']\n for i, ek in enumerate(ekler):\n c = ws.cell(1, start_col_num + i)\n c.value = ek\n \n c = ws.cell(1, start_col_num)\n org1 = f'{c.column_letter}${c.row}'\n c = ws.cell(2, start_col_num)\n org2 = f'{c.column_letter}${c.row}'\n \n formula2 = f\"\"\"=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$I:$I,\"*\"&Ogrenciler!{org1}&\"*\",Hareketler!$G:$G,\">0\")\"\"\"\n c.value = formula2\n \n # Öğrenvi Gelir kalemleri -------------------------\n for i,d in enumerate(ekler):\n for r in range(2,do.shape[0]+3):\n c = ws.cell(r, start_col_num+i)\n fcell = f'{c.column_letter}{c.row}'\n c.value = Translator(formula2, origin=org2.replace('$','')).translate_formula(fcell)\n\n # Gider kalemleri ---------------------------------\n formula3 = f\"\"\"=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$I:$I,\"*\"&Ogrenciler!{org1}&\"*\",Hareketler!$G:$G,\"<0\")\"\"\"\n for i,d in enumerate(dates):\n c = ws.cell(r+1, start_col_num+i)\n fcell = f'{c.column_letter}{c.row}'\n c.value = Translator(formula3, origin=org2.replace('$','')).translate_formula(fcell)\n \n \n # add filter\n ws.auto_filter.ref = ws.dimensions\n ws.freeze_panes = ws['N2'] \n \n # save & close file -------------------------------------------------------\n book.save(file_name)\n book.close()\n \ndef run(file_name = 'Bilsem_Bagis.xlsx'):\n # Dosyaları oku -----------------------------------------------------------------------------------------\n df = read_hareketler()\n do = pd.read_excel('data/Bilsem_12_12_2022 14_07_53.xlsx', sheet_name='genel liste')\n dd = pd.read_excel('data/Bilsem_12_12_2022 14_07_53.xlsx', sheet_name='destek')\n do = pd.merge(left=do, right=dd[['TC NO','İSİM']].assign(DESTEK=1), how='left',\n left_on=['TC NO','İSİM'], right_on=['TC NO','İSİM']\n )\n\n # Açıklamaları temizle ----------------------------------------------------------------------------------\n dfc = df.assign(Açıklama2=df.Açıklama.apply(lambda x: clear(x)))\n #dfc = dfc.sample(20, random_state=12)\n #dfc = dfc[dfc.Açıklama2.str.contains('KEREM ÇINAR')]\n \n \n # Dönem bilgisini ekle, tarih'ten Yıl-Ay formatına çevir ------------------------------------------------\n dfc = dfc.assign(Dönem = lambda x: pd.to_datetime(x.Tarih, format='%d.%m.%Y %H:%M').dt.strftime('%Y-%m'))\n\n\n # Açıklamalar ile öğrencileri eşleştir ------------------------------------------------------------------\n dfcc = pd.merge(dfc, dfc.apply(lambda x: aciklama_match(x.Açıklama2, do), axis=1),\n left_index=True, right_index=True\n )\n\n \n # Birden fazla öğrenci ile eşleşen kayıtları paylaştır --------------------------------------------------\n dff = pd.concat(\n [\n dfcc[dfcc.ÖğrenciID!=-2],\n pd.concat([\n dfcc[dfcc.ÖğrenciID==-2].assign(ÖğrenciID=lambda x: x.Eşleşenler.apply(lambda y: y[0][0]))\\\n .assign(**{'Tutar(TRY)': lambda x: x[['Tutar(TRY)']]/2}), \n dfcc[dfcc.ÖğrenciID==-2].assign(ÖğrenciID=lambda x: x.Eşleşenler.apply(lambda y: y[1][0]))\\\n .assign(**{'Tutar(TRY)': lambda x: x[['Tutar(TRY)']]/2}), \n ]).sort_index() ]\n )\n dff \n \n excelize(do, dff, file_name)\n ", | |
"execution_count": 124, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-24T22:32:36.731531Z", | |
"start_time": "2023-03-24T22:31:38.215172Z" | |
}, | |
"code_folding": [], | |
"scrolled": true, | |
"trusted": true | |
}, | |
"id": "d2f0f945", | |
"cell_type": "code", | |
"source": "run('Bilsem_Bagis_20230316.xlsx')", | |
"execution_count": 125, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "Google Drive/My Drive/Projects/Bilsem/Bilsem_Bagislar.ipynb", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.9.13", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment