Last active
May 28, 2024 02:48
-
-
Save DanielOX/39001e9d010d1771e71d72c220368cac to your computer and use it in GitHub Desktop.
JazzCash Parse Tid and Mobile Number Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
# uri of jazzcash archived messages | |
jazzcash_file = "./jazzcash.txt" | |
# Iterate over and filter recieved cash and spam messages | |
with open(jazzcash_file) as f: | |
data = f.read() | |
transaction_message = [] | |
for sms in re.split(r"(?:\r?\n){2,}",data.strip()): | |
sms = sms.lower() | |
if("received" in sms and "download now for free" not in sms): | |
transaction_message.append(sms) | |
# Print total number of messages in list | |
print(len(transaction_message)) | |
# Remove unnecassary details e.g bank account number | |
original_t = transaction_message | |
transaction_message = [] | |
for index,tr in enumerate(original_t): | |
tr = re.sub('a/c:\ \d+','',tr) | |
tr = re.sub('mcb account: \d+','',tr) | |
transaction_message.append(tr) | |
print(index,tr) | |
# Filter out mobile and tid and append it to array | |
new_data = [] | |
for index,sms in enumerate(transaction_message): | |
regex = re.findall('\d{11,12}',sms) | |
if len(regex) == 1: | |
regex.insert(0,'Nil') | |
regex.append('JazzCash') | |
print(index,regex) | |
new_data.append(regex) | |
# Create DataFrame | |
df = pd.DataFrame(new_data,columns=['mobile','tid','type']) | |
# DF is ready for further analysis | |
df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
TID from JazzCash SMS