Skip to content

Instantly share code, notes, and snippets.

@alperaydyn
Created March 24, 2023 23:02
Show Gist options
  • Save alperaydyn/c884414718214b6d40222743889a8888 to your computer and use it in GitHub Desktop.
Save alperaydyn/c884414718214b6d40222743889a8888 to your computer and use it in GitHub Desktop.
Google Drive/My Drive/Projects/Bilsem/Bilsem_Bagislar.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-24T20:48:01.524739Z",
"start_time": "2023-03-24T20:48:00.861017Z"
},
"trusted": true
},
"id": "31b328fd",
"cell_type": "code",
"source": "import pandas as pd\nimport numpy as np\nimport os\nimport re\nimport locale\nlocale.setlocale(locale.LC_ALL, 'tr_TR')\nimport openpyxl\nfrom openpyxl.formula.translate import Translator",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-24T22:31:30.248419Z",
"start_time": "2023-03-24T22:31:30.201271Z"
},
"code_folding": [
0,
25,
51,
68,
96,
185
],
"trusted": true
},
"id": "e67bf3c6",
"cell_type": "code",
"source": "def read_hareketler():\n hareket_files = [f for f in os.listdir('data') if 'Hareket' in f and f[0]!='~']\n\n hareketler = []\n\n for hareket_file in hareket_files:\n wb = openpyxl.load_workbook(f'data/{hareket_file}')\n\n # look for 'Tarih' for caption row\n for i in range(1,20):\n # read the cell value for colum A and row {i}\n cell_value = wb.worksheets[0].cell(i,1).value\n\n # if cell value is 'Tarih' then data starts from the following row, skip top {i} rows\n if cell_value=='Tarih':\n hareketler.append(pd.read_excel(f'data/{hareket_file}', skiprows=i-1))\n break\n\n df = pd.concat(hareketler)\n #print(df.shape)\n df = df.drop_duplicates()\n #print(df.shape)\n df = df.reset_index(drop=True)\n return df\n\ndef clear(x):\n non_alpha = r'[^a-zA-ZçğıöşüÇĞİÖŞÜ\\u00E7\\u011F\\u0131\\u00F6\\u015F\\u00FC\\u00C7\\u011E\\u0130\\u00D6\\u015E\\u00DC]'\n ret = x\n ret = re.sub('SN: ','SN:', ret)\n ret = re.sub('SN:\\d+','_ ', ret)\n ret = re.sub('Banka: \\d+','_ ', ret)\n ret = re.sub('GönBanka:\\d+ ','_ ', ret)\n ret = re.sub('GönŞube:\\d+ ','_ ', ret)\n ret = re.sub('EftRef:\\d+ ','_ ', ret)\n ret = re.sub('Eft Otomatik Muhasebe',' ', ret)\n ret = re.sub('Eft Otomatik M.*?(?=\\s|$)','_ ',ret)\n ret = re.sub('Eft O.*?(?=\\s|$)','_ ',ret)\n ret = ret.replace('_ ','')\n ret = ret.replace('i','İ').upper()\n \"\"\"\n ret = ret.replace('Ç','C')\n ret = ret.replace('Ğ','G')\n ret = ret.replace('İ','I')\n ret = ret.replace('Ö','O')\n ret = ret.replace('Ş','S')\n ret = ret.replace('Ü','U')\n \"\"\"\n ret = re.sub(non_alpha, ' ', ret)\n ret = ret.strip()\n return ret\n\ndef cmatch(x, tokens):\n tokens = np.unique(tokens).tolist()\n isim = x.İSİM.split(' ')\n \n anne = clear(x['BABA ADI']).split(' ')\n # anne/baba adı veya soyadı çocuğun ismi içinde olduğu zaman çift sayıyor\n anne = [a for a in anne if not a in isim]\n \n baba = clear(x['ANNE ADI']).split(' ')\n # anne/baba adı veya soyadı çocuğun ismi içinde olduğu zaman çift sayıyor\n baba = [b for b in baba if not b in isim]\n \n m1 = sum([1 for t in tokens for o in isim if t==o])\n m2 = sum([0.3 for t in tokens for o in anne if t==o])\n m3 = sum([0.3 for t in tokens for o in baba if t==o])\n return m1+m2+m3\n\ndef aciklama_match(x, do):\n x = clear(x)\n \n # manuel müdahaleler -------------------------------------\n x = x.replace('KEREM ÇINAR','KEREM')\n # manuel müdahaleler -------------------------------------\n \n tokens = [t for t in x.split(' ') if t!='']\n\n dff = do[['İSİM','ANNE ADI','BABA ADI']].assign(PUAN=do.fillna('').apply(lambda x: cmatch(x, tokens), axis=1))\n dff = dff[dff.PUAN>1].sort_values('PUAN', ascending=False)\n max_puan = dff.PUAN.max()\n dff = dff[dff.PUAN==max_puan]\n \n if dff.shape[0]==0:\n oid=-1\n elif dff.shape[0]==1:\n oid=dff.iloc[0,].name\n else:\n oid=-2\n \n ret = pd.Series({'ÖğrenciID': oid ,\n 'EşleşmeAdet': len(dff),\n 'Eşleşenler':dff.reset_index().values,\n })\n \n return ret\n\ndef excelize(do, dff, file_name):\n # create file -------------------------------------------------------------\n \n do.to_excel(file_name, sheet_name='Ogrenciler') # save ogrenciler to file\n\n # open file ---------------------------------------------------------------\n book = openpyxl.load_workbook(file_name, )\n writer = pd.ExcelWriter(file_name, engine='openpyxl') \n writer.book = book\n writer.sheets = dict((ws.title, ws) for ws in book.worksheets)\n\n dff.to_excel(writer, \"Hareketler\") # save Hareketler to file\n writer.save()\n\n # re-open ogrenciler to add hareket data ---------------------------------\n book = openpyxl.load_workbook(file_name)\n ws = book['Ogrenciler']\n\n # calculate distinct dates from hareketler -------------------------------\n dates = dff.Dönem.drop_duplicates().sort_values().values.tolist()\n\n # find last colum to add dates\n max_col_num = ws.max_column + 1\n max_col_cell = ws.cell(1, max_col_num)\n max_col_adr1 = f'{max_col_cell.column_letter}${max_col_cell.row}'\n max_col_adr2 = f'{max_col_cell.column_letter}{max_col_cell.row}'\n \n \n # write calculation formula to first cell --------------------------------\n formula = f'=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$J:$J,Ogrenciler!{max_col_adr1},Hareketler!$G:$G,\">0\")'\n formula_ex = f'=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$J:$J,Ogrenciler!{max_col_adr1},Hareketler!$G:$G,\"<0\")'\n ws[max_col_adr2] = formula\n ws.cell(do.shape[0]+2,1).value = -1 # Tutar>0, Bulunamayan Gelir Kalemleri\n ws.cell(do.shape[0]+2,3).value = 'Bulunamayan Gelirler' \n ws.cell(do.shape[0]+3,1).value = -1 # Tutar<0, Gider Kalemleri\n ws.cell(do.shape[0]+3,3).value = 'Gider Toplamı' \n\n # write all dates to columns and copy the formula to all cells -----------\n # --- Income transactions ----\n for i,d in enumerate(dates):\n ws.cell(1, max_col_num+i).value = d\n for r in range(2,do.shape[0]+3):\n c = ws.cell(r, max_col_num+i)\n c.value = Translator(formula, origin=\"X2\").translate_formula(f'{c.column_letter}{c.row}')\n # --- Expenditures ------------\n for i,d in enumerate(dates):\n c = ws.cell(r+1, max_col_num+i)\n c.value = Translator(formula_ex, origin=\"X2\").translate_formula(f'{c.column_letter}{c.row}')\n \n # Toplam sütunu -----------------------------------------------------------\n start_col_num = ws.max_column + 1\n \n # Atölye Sütunları\n ekler = ['*', 'KİMYA', 'SERAMİK', 'RESİM', 'MÜZİK', 'ROBOT']\n for i, ek in enumerate(ekler):\n c = ws.cell(1, start_col_num + i)\n c.value = ek\n \n c = ws.cell(1, start_col_num)\n org1 = f'{c.column_letter}${c.row}'\n c = ws.cell(2, start_col_num)\n org2 = f'{c.column_letter}${c.row}'\n \n formula2 = f\"\"\"=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$I:$I,\"*\"&Ogrenciler!{org1}&\"*\",Hareketler!$G:$G,\">0\")\"\"\"\n c.value = formula2\n \n # Öğrenvi Gelir kalemleri -------------------------\n for i,d in enumerate(ekler):\n for r in range(2,do.shape[0]+3):\n c = ws.cell(r, start_col_num+i)\n fcell = f'{c.column_letter}{c.row}'\n c.value = Translator(formula2, origin=org2.replace('$','')).translate_formula(fcell)\n\n # Gider kalemleri ---------------------------------\n formula3 = f\"\"\"=SUMIFS(Hareketler!$G:$G,Hareketler!$K:$K,Ogrenciler!$A2,Hareketler!$I:$I,\"*\"&Ogrenciler!{org1}&\"*\",Hareketler!$G:$G,\"<0\")\"\"\"\n for i,d in enumerate(dates):\n c = ws.cell(r+1, start_col_num+i)\n fcell = f'{c.column_letter}{c.row}'\n c.value = Translator(formula3, origin=org2.replace('$','')).translate_formula(fcell)\n \n \n # add filter\n ws.auto_filter.ref = ws.dimensions\n ws.freeze_panes = ws['N2'] \n \n # save & close file -------------------------------------------------------\n book.save(file_name)\n book.close()\n \ndef run(file_name = 'Bilsem_Bagis.xlsx'):\n # Dosyaları oku -----------------------------------------------------------------------------------------\n df = read_hareketler()\n do = pd.read_excel('data/Bilsem_12_12_2022 14_07_53.xlsx', sheet_name='genel liste')\n dd = pd.read_excel('data/Bilsem_12_12_2022 14_07_53.xlsx', sheet_name='destek')\n do = pd.merge(left=do, right=dd[['TC NO','İSİM']].assign(DESTEK=1), how='left',\n left_on=['TC NO','İSİM'], right_on=['TC NO','İSİM']\n )\n\n # Açıklamaları temizle ----------------------------------------------------------------------------------\n dfc = df.assign(Açıklama2=df.Açıklama.apply(lambda x: clear(x)))\n #dfc = dfc.sample(20, random_state=12)\n #dfc = dfc[dfc.Açıklama2.str.contains('KEREM ÇINAR')]\n \n \n # Dönem bilgisini ekle, tarih'ten Yıl-Ay formatına çevir ------------------------------------------------\n dfc = dfc.assign(Dönem = lambda x: pd.to_datetime(x.Tarih, format='%d.%m.%Y %H:%M').dt.strftime('%Y-%m'))\n\n\n # Açıklamalar ile öğrencileri eşleştir ------------------------------------------------------------------\n dfcc = pd.merge(dfc, dfc.apply(lambda x: aciklama_match(x.Açıklama2, do), axis=1),\n left_index=True, right_index=True\n )\n\n \n # Birden fazla öğrenci ile eşleşen kayıtları paylaştır --------------------------------------------------\n dff = pd.concat(\n [\n dfcc[dfcc.ÖğrenciID!=-2],\n pd.concat([\n dfcc[dfcc.ÖğrenciID==-2].assign(ÖğrenciID=lambda x: x.Eşleşenler.apply(lambda y: y[0][0]))\\\n .assign(**{'Tutar(TRY)': lambda x: x[['Tutar(TRY)']]/2}), \n dfcc[dfcc.ÖğrenciID==-2].assign(ÖğrenciID=lambda x: x.Eşleşenler.apply(lambda y: y[1][0]))\\\n .assign(**{'Tutar(TRY)': lambda x: x[['Tutar(TRY)']]/2}), \n ]).sort_index() ]\n )\n dff \n \n excelize(do, dff, file_name)\n ",
"execution_count": 124,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-03-24T22:32:36.731531Z",
"start_time": "2023-03-24T22:31:38.215172Z"
},
"code_folding": [],
"scrolled": true,
"trusted": true
},
"id": "d2f0f945",
"cell_type": "code",
"source": "run('Bilsem_Bagis_20230316.xlsx')",
"execution_count": 125,
"outputs": []
}
],
"metadata": {
"gist": {
"id": "",
"data": {
"description": "Google Drive/My Drive/Projects/Bilsem/Bilsem_Bagislar.ipynb",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.9.13",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment