import pandas as pd
import re

# Path of Easypaisa archived message text file

easypaisa_file = './easypaisa.txt' 

with open(easypaisa_file) as f:
    data = f.read()
    transaction_message = []
    for sms in re.split(r"(?:\r?\n){2,}",data.strip()):
        if(re.search('\d+\.',sms) and 'cashback' not in sms and "Received" in sms ):
            transaction_message.append(sms)

# Pre-Processing 

def clean_date(date):
    date = date.lower()
    return date.replace('[','').replace(']','').strip()
  
  
def clean_trx(trx):
    trx = trx.lower()
    trx = trx.replace('trx id','').replace('.','').strip()
    if not trx:
        return "NULL"
    return trx
  
  
def clean_amount(amount):
    amount = amount.lower()
    amount = re.sub('[^\d.]','',amount).strip()
    if not amount:
        return "NULL"
    if amount[0] == '.':
        return amount[1:]
    return amount

def clean_sender(sender):
    sender = sender.lower()
    sender = re.sub('[^\d]','',sender).strip()
    if not sender:
        return "NULL"
    return sender
  
# Check if found else return NULL

def function_extract(reg):
     return reg.group() if reg else "NULL"

# Iterate thorugh text messages and compare 

for temp in transaction_message:
    date_r = re.compile(r'\[.*\] | Trx ID \d+\.',flags=re.I | re.X)
    trx_r = re.compile('Trx\ ID\ \d+\.',flags=re.I | re.X)
    amount_r = re.compile("Received Rs.?\ \d.*\ from")
    sender_r = re.compile("from \w.*\ \d+\ ")
    sender_mobile_r = re.compile("\d+")

    date          = clean_date(function_extract(date_r.search(temp)))
    tid           = clean_trx(function_extract(trx_r.search(temp)))
    amount        = clean_amount(function_extract(amount_r.search(temp)))
    sender        = clean_sender(function_extract(sender_r.search(temp))) 

    # Storing in list of object for later use in dataframe
    
    transactions.append({
         "date":date,
         "tid":tid,
         "amount":amount,
         "sender": sender
     })
 
# Convert to DataFrame
df = pd.DataFrame(transactions)

# Perform Analysis on DF