Created
July 8, 2024 15:08
-
-
Save moesoha/4a77d44b7eba5120fcbc198869e45bb6 to your computer and use it in GitHub Desktop.
get unsubscribe method from raw email
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import urllib.parse | |
def get_unsubscribe_methods(eml_path): | |
unsubscribe = None | |
with open(eml_path, encoding='ascii', errors='ignore') as f: | |
for line in f: | |
if not line.strip(): | |
break | |
if unsubscribe: | |
if line.startswith(' ') or line.startswith('\t'): | |
unsubscribe += line.strip() | |
else: | |
break | |
if line.lower().startswith('list-unsubscribe: '): | |
unsubscribe = line[18:].strip() | |
if not unsubscribe: | |
return {} | |
methods = {'http': [], 'mail': []} | |
for method in re.findall(r"<([^>]+)>", unsubscribe): | |
url = urllib.parse.urlparse(method) | |
if url.scheme in ['http', 'https']: | |
methods['http'].append(method) | |
elif url.scheme in ['mailto']: | |
methods['mail'].append(method) | |
for key in list(methods.keys()): | |
if not methods[key]: | |
del methods[key] | |
return methods | |
for filename in sys.argv[1:]: | |
print(f'----- {filename} -----') | |
methods = get_unsubscribe_methods(filename) | |
if 'http' in methods: | |
print(methods['http'][0]) | |
elif 'mail' in methods: | |
print(methods['mail'][0]) | |
else: | |
print('no available unsubscribe method') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment