vinovator · March 4, 2016 16:54
diff --git a/world_t20_itinerary.py b/world_t20_itinerary.py
 # world_t20_itinerary.py
 # Python 2.7.6

 """
 Scrap Workd T20 schedule from ICC website using BeautifulSoup & Requests
 Load the schedule into an excel file using pandas
 Format the excel file using openpyxl
 - Apply border, wrap text and color headers
 - Highlight India matches
 """

 import requests
 from BeautifulSoup import BeautifulSoup
 import pandas as pd
 import openpyxl as op


 # Contstants
 url = "http://www.espncricinfo.com/icc-world-twenty20-2016/content/series/901359.html?template=fixtures"
 xl_name = "World T20 Schedule.xlsx"
 sheet_name = "Itinerary"


 def scrap_schedule():
    """
    Scrap WT20 schedule from cricinfo website
    """

    resp = requests.get(url)

    # Scrap schedule
    soup = BeautifulSoup(resp.content)
    # print soup.prettify()

    schedule_ul = soup.findAll("ul", {"class": "large-20 columns"})

    match_list = list()

    for section in schedule_ul:
        schedule_li = section.findAll(
            "li", {"class": "large-20 medium-20 columns"})

        for li in schedule_li:
            match = dict()
            divs = li.findAll("div")
            for div in divs:
                # Match date and time details
                if (div["class"] == "large-5 medium-5 small-20 columns"):
                    fixture = div.findAll("span", {"class": "fixture_date"})

                    match["date"] = fixture[0].getText()
                    match["time"] = fixture[1].getText().replace("&nbsp;", " ")
                    local_time = div.getText().split("(")[-1]
                    local_time = local_time.split(")")[0].replace(
                        "&nbsp;", " ")

                    match["local time"] = local_time

                # Teams and venue details
                elif (div["class"] == "large-11 medium-11 small-20 columns"):
                    # print div.getText()
                    spans = div.findAll("span")

                    match["match details"] = spans[0].getText()
                    match["venue"] = spans[1].getText()
                    match["live"] = spans[2].getText()

                    if len(spans) > 3:
                        match["match type"] = spans[3].getText()
                    else:
                        match["match type"] = "D"

                # This div is just there for empty space; so pass
                elif (div["class"] ==
                      "large-4 medium-4 columns hide-for-small"):
                    pass

            match_list.append(match)

    return match_list  # List of all matches


 def load_to_excel(match_list):
    """
    Load the match list into excel file using pandas
    """
    # Load list to dataframe
    wt20_df = pd.DataFrame(match_list)
    wt20_df.index += 1  # So that the excel column starts from 1

    # Write dataframe to excel
    xlw = pd.ExcelWriter(xl_name)
    wt20_df.to_excel(xlw, sheet_name=sheet_name, index_label="#")
    xlw.save()


 def format_excel(xl, sheet="Sheet1"):
    """
    Get the excel file path and format the file
    If no sheet name is passed, by default take Sheet1
    """
    # Open the excel file
    wb = op.load_workbook(xl)
    ws = wb.get_sheet_by_name(sheet)

    # Freeze panes
    ws.freeze_panes = "B2"

    # Adjust column width
    cols = ("A", "E", "H")
    widths = (5, 50, 40)

    for combo in zip(cols, widths):
        ws.column_dimensions[combo[0]].width = combo[1]

    # define color formmatting
    blue_fill = op.styles.PatternFill(start_color="00aadd",
                                      fill_type='solid')

    green_fill = op.styles.PatternFill(start_color="00ff00",
                                       fill_type='solid')

    # define border style
    thin_border = op.styles.borders.Border(left=op.styles.Side(style='thin'),
                                           right=op.styles.Side(style='thin'),
                                           top=op.styles.Side(style='thin'),
                                           bottom=op.styles.Side(style='thin'))

    # define Text wrap
    text_wrap = op.styles.Alignment(wrap_text=True)

    # Format the header row
    for row in range(1, 2):  # Loop only the 1st row
        for col in range(1, ws.max_column + 1):  # loop through all columns
            ws.cell(row=row, column=col).fill = blue_fill

    # Format all cells
    for row in ws.iter_rows():
        for cell in row:
            # Draw borders
            cell.border = thin_border

            # Wrap all columns
            cell.alignment = text_wrap

            # Highlight India matches
            if (cell.col_idx == 5) and (cell.value.find("India") > 0):
                cell.fill = green_fill

    # Save back as same file name
    wb.save(xl)


 if __name__ == "__main__":
    """ Starting block """

    # Scrap the schedule from cricinfo website
    schedule = scrap_schedule()

    # Load the match list to excel
    load_to_excel(schedule)

    # Format the excel file
    format_excel(xl_name, sheet_name)
	# world_t20_itinerary.py
	# Python 2.7.6

	"""
	Scrap Workd T20 schedule from ICC website using BeautifulSoup & Requests
	Load the schedule into an excel file using pandas
	Format the excel file using openpyxl
	- Apply border, wrap text and color headers
	- Highlight India matches
	"""

	import requests
	from BeautifulSoup import BeautifulSoup
	import pandas as pd
	import openpyxl as op


	# Contstants
	url = "http://www.espncricinfo.com/icc-world-twenty20-2016/content/series/901359.html?template=fixtures"
	xl_name = "World T20 Schedule.xlsx"
	sheet_name = "Itinerary"


	def scrap_schedule():
	"""
	Scrap WT20 schedule from cricinfo website
	"""

	resp = requests.get(url)

	# Scrap schedule
	soup = BeautifulSoup(resp.content)
	# print soup.prettify()

	schedule_ul = soup.findAll("ul", {"class": "large-20 columns"})

	match_list = list()

	for section in schedule_ul:
	schedule_li = section.findAll(
	"li", {"class": "large-20 medium-20 columns"})

	for li in schedule_li:
	match = dict()
	divs = li.findAll("div")
	for div in divs:
	# Match date and time details
	if (div["class"] == "large-5 medium-5 small-20 columns"):
	fixture = div.findAll("span", {"class": "fixture_date"})

	match["date"] = fixture[0].getText()
	match["time"] = fixture[1].getText().replace(" ", " ")
	local_time = div.getText().split("(")[-1]
	local_time = local_time.split(")")[0].replace(
	" ", " ")

	match["local time"] = local_time

	# Teams and venue details
	elif (div["class"] == "large-11 medium-11 small-20 columns"):
	# print div.getText()
	spans = div.findAll("span")

	match["match details"] = spans[0].getText()
	match["venue"] = spans[1].getText()
	match["live"] = spans[2].getText()

	if len(spans) > 3:
	match["match type"] = spans[3].getText()
	else:
	match["match type"] = "D"

	# This div is just there for empty space; so pass
	elif (div["class"] ==
	"large-4 medium-4 columns hide-for-small"):
	pass

	match_list.append(match)

	return match_list # List of all matches


	def load_to_excel(match_list):
	"""
	Load the match list into excel file using pandas
	"""
	# Load list to dataframe
	wt20_df = pd.DataFrame(match_list)
	wt20_df.index += 1 # So that the excel column starts from 1

	# Write dataframe to excel
	xlw = pd.ExcelWriter(xl_name)
	wt20_df.to_excel(xlw, sheet_name=sheet_name, index_label="#")
	xlw.save()


	def format_excel(xl, sheet="Sheet1"):
	"""
	Get the excel file path and format the file
	If no sheet name is passed, by default take Sheet1
	"""
	# Open the excel file
	wb = op.load_workbook(xl)
	ws = wb.get_sheet_by_name(sheet)

	# Freeze panes
	ws.freeze_panes = "B2"

	# Adjust column width
	cols = ("A", "E", "H")
	widths = (5, 50, 40)

	for combo in zip(cols, widths):
	ws.column_dimensions[combo[0]].width = combo[1]

	# define color formmatting
	blue_fill = op.styles.PatternFill(start_color="00aadd",
	fill_type='solid')

	green_fill = op.styles.PatternFill(start_color="00ff00",
	fill_type='solid')

	# define border style
	thin_border = op.styles.borders.Border(left=op.styles.Side(style='thin'),
	right=op.styles.Side(style='thin'),
	top=op.styles.Side(style='thin'),
	bottom=op.styles.Side(style='thin'))

	# define Text wrap
	text_wrap = op.styles.Alignment(wrap_text=True)

	# Format the header row
	for row in range(1, 2): # Loop only the 1st row
	for col in range(1, ws.max_column + 1): # loop through all columns
	ws.cell(row=row, column=col).fill = blue_fill

	# Format all cells
	for row in ws.iter_rows():
	for cell in row:
	# Draw borders
	cell.border = thin_border

	# Wrap all columns
	cell.alignment = text_wrap

	# Highlight India matches
	if (cell.col_idx == 5) and (cell.value.find("India") > 0):
	cell.fill = green_fill

	# Save back as same file name
	wb.save(xl)


	if __name__ == "__main__":
	""" Starting block """

	# Scrap the schedule from cricinfo website
	schedule = scrap_schedule()

	# Load the match list to excel
	load_to_excel(schedule)

	# Format the excel file
	format_excel(xl_name, sheet_name)