|
import re |
|
import sys |
|
import openpyxl |
|
from datetime import datetime |
|
from bs4 import BeautifulSoup |
|
from openpyxl import load_workbook |
|
from openpyxl.styles.numbers import FORMAT_DATE_DDMMYY, FORMAT_NUMBER_00 |
|
|
|
class Transaction: |
|
def __init__(self, fund_name, fund_type, isin, folio, txn_data): |
|
self.fund_name = fund_name |
|
self.fund_type = fund_type |
|
|
|
self.isin = isin |
|
self.folio = folio |
|
|
|
serial_no, units, purchase_date, purchase_value, purchase_nav, acquisition_value, jan31_value, jan31_nav, redemption_date, redemption_value, redemption_nav, stcg, ltcg = txn_data |
|
|
|
self.serial_no = int(serial_no) |
|
self.units = float(units) |
|
self.purchase_date = datetime.strptime(purchase_date, '%b %d, %Y').strftime('%d/%m/%Y') |
|
self.purchase_value = float(purchase_value) |
|
self.purchase_nav = float(purchase_nav) |
|
self.acquisition_value = float(acquisition_value) |
|
|
|
try: |
|
self.jan31_value = float(jan31_value) |
|
except: |
|
self.jan31_value = None |
|
|
|
self.redemption_date = datetime.strptime(redemption_date, '%b %d, %Y').strftime('%d/%m/%Y') |
|
self.redemption_value = float(redemption_value) |
|
|
|
try: |
|
self.stcg = float(stcg) |
|
except: |
|
self.stcg = None |
|
|
|
try: |
|
self.ltcg = float(ltcg) |
|
except: |
|
self.ltcg = None |
|
|
|
def get_data(self): |
|
return ( |
|
self.fund_type, |
|
self.isin, |
|
self.fund_name, |
|
self.units, |
|
self.purchase_date, |
|
self.purchase_value, |
|
self.redemption_date, |
|
self.redemption_value / self.units if self.redemption_value else None, |
|
self.jan31_value / self.units if self.jan31_value else None, |
|
0.0, # Set "Transfer expenses (stamp duty, brokerage, etc.) column to 0 |
|
self.purchase_nav, |
|
self.acquisition_value |
|
) |
|
|
|
# Reads all transactions from the capital gains report from Kuvera and returns the parsed data |
|
# in a tuple of (transactions, total_stcg, total_ltcg) |
|
def read_transactions(capital_gains_xls_file): |
|
fund_name_pattern = re.compile('(.*)\[ISIN') |
|
isin_pattern = re.compile('\[ISIN: (.*)\]') |
|
folio_pattern = re.compile('Folio No: (.*)') |
|
|
|
# Open the workbook and the sheet in the ClearTax Capital Gains Excel |
|
wb = load_workbook(capital_gains_xls_file) |
|
cg_sheet = wb.worksheets[0] |
|
|
|
# Initialize all variables |
|
current_fund_name = None |
|
current_fund_type = None |
|
current_isin = None |
|
current_folio = None |
|
|
|
equity_subtotal = None |
|
debt_subtotal = None |
|
total_stcg = None |
|
total_ltcg = None |
|
all_transactions = [] |
|
|
|
fund_types = { |
|
'Equity': 'MF (Equity)', |
|
'Hybrid': 'MF (Equity)', # Hybrid funds are also Equity funds from a Capital Gains perspective |
|
'Others': 'MF (Equity)', # Index funds are marked "Others" in Kuvera's report, for some reason |
|
'Debt': 'MF (Other than Equity)' |
|
} |
|
|
|
for row in cg_sheet.iter_rows(): |
|
if isinstance(row[0].value, str): |
|
# This row contains either Fund Name & ISIN or Folio Number or the (Sub)Total |
|
column_data = row[0].value |
|
isin_match = isin_pattern.search(column_data) |
|
folio_match = folio_pattern.search(column_data) |
|
fund_name_match = fund_name_pattern.search(column_data) |
|
|
|
if isin_match: |
|
# This row contains the ISIN |
|
current_isin = isin_match.group(1) |
|
# Extract fund name |
|
fund_name_match = fund_name_pattern.search(column_data) |
|
current_fund_name = fund_name_match.group(1) |
|
|
|
elif folio_match: |
|
# This row contains the Folio Number |
|
current_folio = folio_match.group(1) |
|
|
|
elif row[0].value == 'Total': |
|
# This row contains the Total Capital Gains |
|
total_stcg = float(row[11].value) |
|
total_ltcg = float(row[12].value) |
|
|
|
for fund_type in fund_types: |
|
# This assigns the fund type |
|
if fund_type in column_data: |
|
current_fund_type = fund_types[fund_type] |
|
break |
|
|
|
elif isinstance(row[0].value, int): |
|
# This row contains a transaction |
|
txn_data = [cell.value for cell in row] |
|
transaction = Transaction(current_fund_name, current_fund_type, current_isin, current_folio, txn_data) |
|
all_transactions.append(transaction) |
|
|
|
|
|
return (all_transactions, total_stcg, total_ltcg) |
|
|
|
def write_capital_gains_report(all_transactions, output_xlsx_file, cleartax_template_xlsx_file): |
|
|
|
wb = load_workbook(cleartax_template_xlsx_file) |
|
|
|
# We only populate data in the Mutual Fund Sheet, which is the second one in the template |
|
mf_sheet = wb.worksheets[2] |
|
|
|
# Columns 9, 11 and 13 are computed from the other columns |
|
columns_to_update = [1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 15] |
|
column_types = [ |
|
'string', |
|
'string', |
|
'string', |
|
'float', |
|
'date', |
|
'float', |
|
'date', |
|
'float', |
|
'float', |
|
'string', |
|
'float', |
|
'float' |
|
] |
|
for i in range(len(all_transactions)): |
|
txn_data = all_transactions[i].get_data() |
|
for (val, j, col_type) in zip(txn_data, columns_to_update, column_types): |
|
cell = mf_sheet.cell(row=i+2, column=j) |
|
if val is not None: |
|
cell.value = val |
|
if col_type == 'float': |
|
cell.number_format = FORMAT_NUMBER_00 |
|
|
|
wb.save(output_xlsx_file) |
|
|
|
def prepare(capital_gains_xls_file, cleartax_template_xlsx_file, output_xlsx_file): |
|
|
|
all_transactions, total_stcg, total_ltcg = read_transactions(capital_gains_xls_file) |
|
|
|
stcg_sum = round(sum([txn.stcg for txn in all_transactions if txn.stcg is not None]), 2) |
|
ltcg_sum = round(sum([txn.ltcg for txn in all_transactions if txn.ltcg is not None]), 2) |
|
print('Sum of all STCG across all transactions:', stcg_sum) |
|
print('Total STCG from report:', total_stcg) |
|
print('Sum of all LTCG across all transactions:', ltcg_sum) |
|
print('Total LTCG from report:', total_ltcg) |
|
|
|
write_capital_gains_report(all_transactions, output_xlsx_file, cleartax_template_xlsx_file) |
|
|
|
if __name__ == '__main__': |
|
prepare(sys.argv[1], sys.argv[2], sys.argv[3]) |
Hey @arora-nikhil , it looks like clear tax has updated their template, which breaks this script.