Skip to content

Instantly share code, notes, and snippets.

@punchagan
Last active November 29, 2023 12:26
Show Gist options
  • Save punchagan/f0e182bb9cbdcb16f82126490829d092 to your computer and use it in GitHub Desktop.
Save punchagan/f0e182bb9cbdcb16f82126490829d092 to your computer and use it in GitHub Desktop.
Whatsapp Broadcast Export to Zulip
#!/usr/bin/env python3
from datetime import datetime
import re
import sys
import zulip
DATE_FORMAT = "%m/%d/%y, %H:%M"
def is_message(text):
return bool(re.match("^\w+:", text))
def extract_messages(text):
date_messages = re.split(
"(\d{2}/\d{2}/\d{2}, \d{2}:\d{2}) - ", text.strip(), flags=re.MULTILINE
)[1:]
messages = []
for date, message in zip(date_messages[::2], date_messages[1::2]):
date = datetime.strptime(date, DATE_FORMAT)
message = message.strip()
if not is_message(message):
continue
sender, text = message.split(":", 1)
lines = text.strip().splitlines()
files = [line for line in lines if line.endswith("(file attached)")]
text = "\n".join([line for line in lines if line not in files])
files = [line.split(" ", 1)[0] for line in files]
parsed_message = {
"date": date,
"text": text,
"sender": sender,
"files": files,
}
messages.append(parsed_message)
return messages
def assert_broadcast(messages):
senders = {message["sender"] for message in messages}
assert len(senders) == 1, "Multiple senders - not a WhatsApp Broadcast!"
def upload_image(client, filename):
# FIXME: filename should be converted to path
with open(filename, "rb") as fp:
result = client.call_endpoint(
"user_uploads", method="POST", files=[fp]
)
return result["uri"]
def send_zulip_message(client, zulip_info, message):
if message["files"]:
image_uri = upload_image(client, message["files"][0])
message["text"] = "[]({})\n\n{}".format(image_uri, message["text"])
request = {
"type": "stream",
"to": zulip_info["stream"],
"subject": zulip_info["topic"],
"content": message["text"].strip(),
}
result = client.send_message(request)
print(result)
def main(path, zuliprc_path, zulip_stream):
with open(path) as f:
text = f.read()
messages = extract_messages(text)
assert_broadcast(messages)
client = zulip.Client(config_file=zuliprc_path)
zulip_info = {
"stream": zulip_stream,
"topic": path.rsplit(".", 1)[0].split("with", 1)[1].strip(),
}
for message in messages:
# FIXME: Skip messages that have been already sent? For now we assume
# that the archive file is cleaned up manually, of the older messages.
send_zulip_message(client, zulip_info, message)
if __name__ == "__main__":
export_path = sys.argv[1]
zuliprc_path = sys.argv[2]
zulip_stream = sys.argv[3]
main(export_path, zuliprc_path, zulip_stream)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment