enzo-santos · July 5, 2020 01:25
diff --git a/mobile_twitter_parser.py b/mobile_twitter_parser.py
 import bs4
 import requests

 from typing import Dict, List

 class MobileTwitterParser:
    """
    Parses information from the main page of a Twitter account.
    
    This parses information based on the mobile version of the Twitter webpage
    because 1. it's fast and 2. it's simple, but it's contains less information
    than the desktop version, such as the pinned tweet and the exact timestamp
    of the tweets (it only shows how long ago the tweet was made, instead of an
    HH:MM:SS format).
    """
    def __init__(self, url: str):
        """
        Creates a new Parser object.
        
        Parameters
        ----------
        url
            The main page URL of this Twitter account. Because this parser parses
            information from the mobile version of the Twitter webpage, the domain
            name must be 'mobile.tweet.com'.
        """
        self.soup = bs4.BeautifulSoup(requests.get(url).text, 'lxml')

    @classmethod
    def from_username(cls, username: str):
        """
        Creates a new Parser object from an username.
        
        Parameters
        ----------
        username
            The username of the account to be parsed, without the leading @.
        """
        return cls(f'https://mobile.twitter.com/{username}')

    @property
    def profile_info(self) -> Dict[str, str]:
        """
        Return the parsed information from this profile.
        
        Returns
        -------
        The parsed data. It may contain the following keys:
            
            - is_invalid: bool: if this account exists. If this value is
                    True, this is the only key in the dictionary.                    
            - is_suspended: bool: if this account is suspended. If this
                    value is True, this is the only key in the dictionary.
            - username: str: the Twitter handle of this account.
            - fullname: str: the user fullname of this account.
            - is_verified: bool: if this account is verified.
            - location: str: the location of this account. If this value
                    is empty, the user did not set this field.
            - url: str: the URL of this account. Note that this field is
                    user-defined, and it has nothing to do with the 
                    constructor parameter of this class. If this value is
                    empty, the user did not set this field.
            - is_protected: bool: if this account is protected.
            - tweets: str: number of tweets made by this account.
            - following: str: number of users this account is following. 
            - followers: str: number of users following this account.
        """
        profile_info = {}

        # If this element exists, something's wrong
        if self.soup.find('div', {'class': 'blue'}):
            if self.soup.find('link', {'rel': 'canonical'}):
                profile_info['is_invalid'] = True
                return profile_info
            
            profile_info['is_suspended'] = True
            return profile_info
        
        elem = self.soup.find('table', {'class': 'profile-details'})
        profile_info['username'] = elem.find('div', {'class': 'username'}).text.strip()[2:]
        profile_info['fullname'] = elem.find('div', {'class': 'fullname'}).text.strip()
        profile_info['is_verified'] = elem.find('a', {'class': 'badge'}) is not None
        profile_info['location'] = elem.find('div', {'class': 'location'}).text.strip()
        profile_info['bio'] = elem.find('div', {'class': 'bio'}).text.strip()
        profile_info['url'] = elem.find('div', {'class': 'url'}).text.strip()
        profile_info['is_protected'] = self.soup.find('div', {'class': 'protected'}) is not None
        
        elems = self.soup.find('table', {'class': 'profile-stats'}).find_all('td')
        for i, key in enumerate(('tweets', 'following', 'followers')):
            profile_info[key] = elems[i].find('div', {'class': 'statnum'}).text.strip().replace(',', '')
            
        return profile_info

    @property
    def tweets_info(self) -> List[Dict[str, str]]:
        """
        Return the parsed information from the tweets of this account.
        
        Note that not all tweets are loaded: only those that are in the main page,
        except the pinned tweet.
        
        Returns
        -------
        The parsed data. It may contain the following keys:
        
            - id: str: this tweet id.
            - sender: str: the username of the account that made this tweet.
            - timestamp: str: the timestamp when this tweet were made. It is
                    in the format '(\d) (s|m|h|d|w|y)', where the 1st group
                    represents the time value and the 2nd group the time unit.
            - text: str: the content of this tweet.
            - replying_to: str: the username this tweet is replying to. If 
                    this tweet is replying to no one, this field does not exist.
        """
        tweets_info = []
        for tweet_soup in self.soup.find_all('table', {'class': 'tweet'}):
            tweet_info = {}
            tweet_info['id'] = tweet_soup.find('div', {'class': 'tweet-text'}).get('data-id').strip()
            tweet_info['sender'] = tweet_soup.find('div', {'class': 'username'}).text.strip()[1:]
            tweet_info['timestamp'] = tweet_soup.find('td', {'class': 'timestamp'}).text.strip()    
            tweet_info['text'] = tweet_soup.find('div', {'class': 'tweet-text'}).text.strip()
                
            elem = tweet_soup.find('div', {'class': 'tweet-reply-context'})
            if elem:
                tweet_info['replying_to'] = elem.find('a').text.strip()[1:]
                    
            tweets_info.append(tweet_info)

        return tweets_info
    
 if __name__ == '__main__':
    parser = Parser.from_username('realDonaldTrump')
    print(parser.profile_info)
    print(parser.tweets_info)
	import bs4
	import requests

	from typing import Dict, List

	class MobileTwitterParser:
	"""
	Parses information from the main page of a Twitter account.

	This parses information based on the mobile version of the Twitter webpage
	because 1. it's fast and 2. it's simple, but it's contains less information
	than the desktop version, such as the pinned tweet and the exact timestamp
	of the tweets (it only shows how long ago the tweet was made, instead of an
	HH:MM:SS format).
	"""
	def __init__(self, url: str):
	"""
	Creates a new Parser object.

	Parameters
	----------
	url
	The main page URL of this Twitter account. Because this parser parses
	information from the mobile version of the Twitter webpage, the domain
	name must be 'mobile.tweet.com'.
	"""
	self.soup = bs4.BeautifulSoup(requests.get(url).text, 'lxml')

	@classmethod
	def from_username(cls, username: str):
	"""
	Creates a new Parser object from an username.

	Parameters
	----------
	username
	The username of the account to be parsed, without the leading @.
	"""
	return cls(f'https://mobile.twitter.com/{username}')

	@property
	def profile_info(self) -> Dict[str, str]:
	"""
	Return the parsed information from this profile.

	Returns
	-------
	The parsed data. It may contain the following keys:

	- is_invalid: bool: if this account exists. If this value is
	True, this is the only key in the dictionary.
	- is_suspended: bool: if this account is suspended. If this
	value is True, this is the only key in the dictionary.
	- username: str: the Twitter handle of this account.
	- fullname: str: the user fullname of this account.
	- is_verified: bool: if this account is verified.
	- location: str: the location of this account. If this value
	is empty, the user did not set this field.
	- url: str: the URL of this account. Note that this field is
	user-defined, and it has nothing to do with the
	constructor parameter of this class. If this value is
	empty, the user did not set this field.
	- is_protected: bool: if this account is protected.
	- tweets: str: number of tweets made by this account.
	- following: str: number of users this account is following.
	- followers: str: number of users following this account.
	"""
	profile_info = {}

	# If this element exists, something's wrong
	if self.soup.find('div', {'class': 'blue'}):
	if self.soup.find('link', {'rel': 'canonical'}):
	profile_info['is_invalid'] = True
	return profile_info

	profile_info['is_suspended'] = True
	return profile_info

	elem = self.soup.find('table', {'class': 'profile-details'})
	profile_info['username'] = elem.find('div', {'class': 'username'}).text.strip()[2:]
	profile_info['fullname'] = elem.find('div', {'class': 'fullname'}).text.strip()
	profile_info['is_verified'] = elem.find('a', {'class': 'badge'}) is not None
	profile_info['location'] = elem.find('div', {'class': 'location'}).text.strip()
	profile_info['bio'] = elem.find('div', {'class': 'bio'}).text.strip()
	profile_info['url'] = elem.find('div', {'class': 'url'}).text.strip()
	profile_info['is_protected'] = self.soup.find('div', {'class': 'protected'}) is not None

	elems = self.soup.find('table', {'class': 'profile-stats'}).find_all('td')
	for i, key in enumerate(('tweets', 'following', 'followers')):
	profile_info[key] = elems[i].find('div', {'class': 'statnum'}).text.strip().replace(',', '')

	return profile_info

	@property
	def tweets_info(self) -> List[Dict[str, str]]:
	"""
	Return the parsed information from the tweets of this account.

	Note that not all tweets are loaded: only those that are in the main page,
	except the pinned tweet.

	Returns
	-------
	The parsed data. It may contain the following keys:

	- id: str: this tweet id.
	- sender: str: the username of the account that made this tweet.
	- timestamp: str: the timestamp when this tweet were made. It is
	in the format '(\d) (s\|m\|h\|d\|w\|y)', where the 1st group
	represents the time value and the 2nd group the time unit.
	- text: str: the content of this tweet.
	- replying_to: str: the username this tweet is replying to. If
	this tweet is replying to no one, this field does not exist.
	"""
	tweets_info = []
	for tweet_soup in self.soup.find_all('table', {'class': 'tweet'}):
	tweet_info = {}
	tweet_info['id'] = tweet_soup.find('div', {'class': 'tweet-text'}).get('data-id').strip()
	tweet_info['sender'] = tweet_soup.find('div', {'class': 'username'}).text.strip()[1:]
	tweet_info['timestamp'] = tweet_soup.find('td', {'class': 'timestamp'}).text.strip()
	tweet_info['text'] = tweet_soup.find('div', {'class': 'tweet-text'}).text.strip()

	elem = tweet_soup.find('div', {'class': 'tweet-reply-context'})
	if elem:
	tweet_info['replying_to'] = elem.find('a').text.strip()[1:]

	tweets_info.append(tweet_info)

	return tweets_info

	if __name__ == '__main__':
	parser = Parser.from_username('realDonaldTrump')
	print(parser.profile_info)
	print(parser.tweets_info)