mboo2005 · August 25, 2019 14:54
diff --git a/gistfile1.py b/gistfile1.py
 import urllib2
 import json
 import re

 class SoccerwayTeamMatches:

    def __init__(self, teamId):
        self.teamId = str(teamId)
        self.data = {'all': [], 'home': [], 'away': []}

    def parseJson(self, jsonStr):
        """
        Handles the parsing of the JSON object returned
        by Soccerway API (nr.soccerway.com/a/block_team_matches).
        
        Input: The JSON object returned by the API as a string
        
        Return: Two dimensional array of the match results

        """
        jsonPy = json.loads(jsonStr)

        # Fetch the interesting part of inputted JSON obj
        content = jsonPy['commands'][0]['parameters']['content']

        # Remove uninteresting header and footer data
        cleanContent = content.split('</tbody>',1)[0].split('<thead',1)[1]

        # Split content by <tr> -tags (tr is shorthand for table row)
        p1 = re.compile(r'<tr[^<]+?>')
        splitted = p1.split(cleanContent)
        header = splitted[1] # First row is the table header data
        data = splitted[2:-1] # Rest are the match info 

        # Split content by <td> -tags (table columns) and clean other tags
        p2 = re.compile(r'<td[^<]+?>')
        f = lambda x: map(lambda y: re.sub('<[^<]+?>','',y).strip(), p2.split(x)[1:-2])
        return map(f, data)

    def getData(self, matchType):
        """ 
        Return the cleaned match data in 2D array.
        Does simple caching of the GET queries, that is,
        same data is not queried twice.

        Input: Type of the matches, must be one of
        the following strings: 'all', 'away' or 'home'.

        Output: 2d array of match results

        """
        if matchType not in ['all','away','home']:
            return []

        if not self.data[matchType]:
            url = "http://nr.soccerway.com/a/block_team_matches" \
                  "?block_id=page_team_1_block_team_matches_5" \
                  "&callback_params=%7B%22page%22%3A0%2C%22" \
                  "bookmaker_urls%22%3A%5B%5D%2C%22block_service_id" \
                  "%22%3A%22team_matches_block_teammatches%22%2C%22" \
                  "team_id%22%3A"+self.teamId+"%2C%22competition_id" \
                  "%22%3A0%2C%22filter%22%3A%22all%22%7D" \
                  "&action=filterMatches&params=%7B%22" \
                  "filter%22%3A%22"+matchType+"%22%7D"
            jsonStr = urllib2.urlopen(url).read()
            self.data[matchType] = self.parseJson(jsonStr)
        
        return self.data[matchType]
	import urllib2
	import json
	import re

	class SoccerwayTeamMatches:

	def __init__(self, teamId):
	self.teamId = str(teamId)
	self.data = {'all': [], 'home': [], 'away': []}

	def parseJson(self, jsonStr):
	"""
	Handles the parsing of the JSON object returned
	by Soccerway API (nr.soccerway.com/a/block_team_matches).

	Input: The JSON object returned by the API as a string

	Return: Two dimensional array of the match results

	"""
	jsonPy = json.loads(jsonStr)

	# Fetch the interesting part of inputted JSON obj
	content = jsonPy['commands'][0]['parameters']['content']

	# Remove uninteresting header and footer data
	cleanContent = content.split('</tbody>',1)[0].split('<thead',1)[1]

	# Split content by <tr> -tags (tr is shorthand for table row)
	p1 = re.compile(r'<tr[^<]+?>')
	splitted = p1.split(cleanContent)
	header = splitted[1] # First row is the table header data
	data = splitted[2:-1] # Rest are the match info

	# Split content by <td> -tags (table columns) and clean other tags
	p2 = re.compile(r'<td[^<]+?>')
	f = lambda x: map(lambda y: re.sub('<[^<]+?>','',y).strip(), p2.split(x)[1:-2])
	return map(f, data)

	def getData(self, matchType):
	"""
	Return the cleaned match data in 2D array.
	Does simple caching of the GET queries, that is,
	same data is not queried twice.

	Input: Type of the matches, must be one of
	the following strings: 'all', 'away' or 'home'.

	Output: 2d array of match results

	"""
	if matchType not in ['all','away','home']:
	return []

	if not self.data[matchType]:
	url = "http://nr.soccerway.com/a/block_team_matches" \
	"?block_id=page_team_1_block_team_matches_5" \
	"&callback_params=%7B%22page%22%3A0%2C%22" \
	"bookmaker_urls%22%3A%5B%5D%2C%22block_service_id" \
	"%22%3A%22team_matches_block_teammatches%22%2C%22" \
	"team_id%22%3A"+self.teamId+"%2C%22competition_id" \
	"%22%3A0%2C%22filter%22%3A%22all%22%7D" \
	"&action=filterMatches&params=%7B%22" \
	"filter%22%3A%22"+matchType+"%22%7D"
	jsonStr = urllib2.urlopen(url).read()
	self.data[matchType] = self.parseJson(jsonStr)

	return self.data[matchType]