Last active
February 3, 2020 16:43
-
-
Save vlad-ds/d12d4de94304898189b197b699004800 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
def get_mails(path: str, limit: int = None) -> List[str]: | |
'''Extract emails from an .mbox file.''' | |
mails = [] | |
c = -1 | |
mail = str() | |
with open(path, 'r', encoding = 'UTF-8') as file: | |
for line in file: #read every line | |
if c == limit: break | |
if line.startswith('From'): #when a new mail starts... | |
c += 1 #add it to the counter | |
mails.append(mail) #add the lines that you've been saving | |
mail = line #reset the mail text to the first line | |
else: | |
mail += line #add each line to the mail text | |
return mails[1:] #first line is redudant |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment