Skip to content

Instantly share code, notes, and snippets.

@SIRHAMY
Created February 24, 2016 01:25
Show Gist options
  • Save SIRHAMY/103aa453cd0bd8053359 to your computer and use it in GitHub Desktop.
Save SIRHAMY/103aa453cd0bd8053359 to your computer and use it in GitHub Desktop.
Parsing URL Code - Cause why not?
# -----------------------------------------------
# This project should be completed using the
# Python programming language.
#
# Problem:
#----------
# Please parse the following URL into parts:
# http://www.vandyhacks.org/dostuff/now
#
# While you've been given a specific URL
# Your solution should work for any valid
# URL. Below is a skeleton and some test
# code. You're job is to fill in the
# implmentation of the parse_url() function.
#
# parse_url() should return a python
# dictionary with the following keys
#
# * scheme
# * host
# * path
#
# NOTE: there are a number of solutions to
# this problem including pre-built modules
# like urlparse.
#
# Your solution should display your ability
# to create efficient readable code that
# solves the problem without using pre-built
# libraries directly related to parsing URLs
#
# If you provide multiple ways to solve the
# problem that isn't a bad thing.
# -----------------------------------------------
the_url = "http://www.vandyhacks.org/dostuff/now";
simple_test = ['http://www.tutorialspoint.com/python/python_variable_types.htm', 'https://dev.twitter.com/cards/mobile/url-schemes','https://wordpress.com/stats/day/www.sirhamy.com::blog','https://twitter.com/',
'http://first-news-app.readthedocs.org/en/latest/#act-5-hello-internet', 'http://www.poynter.org/2015/github-tutorials-and-resources-for-journalists/364387/', 'http://www.nltk.org/book/ch06.html',
'https://docs.google.com/spreadsheets/d/1hefbuuYoYV7dxzdWgUrepVHhN9LrQtTJXh8i9carKuo/edit#gid=0', 'https://trello.com/b/XucTn63Y/school', 'https://gist.github.com/bmayes/bcdde36130ac797a4175', 'http://sirhamy.com'];
def parse_url(url):
# make sure your solution is valid
# for NOT JUST the test URL. I will
# be testing with others.
# your code goes here.
# do not solve using split()
# unitl you add code execution will fail with
# an 'IndentationError' until you start to fill
# in this function.
#Scheme always ends with ://
schemePtr = url.find(":");
#Check if host ends with final / or ? which happens after ://
hostPtr = url.find("/", schemePtr+3);
if(hostPtr == -1):
hostPtr = url.find("?",schemePtr+3);
url_dict = dict();
url_dict['scheme'] = url[:schemePtr];
if(hostPtr > schemePtr + 3):
url_dict['host'] = url[schemePtr+3:hostPtr];
else:
url_dict['host'] = url[schemePtr+3:];
#We don't want the initial / in our dictionary - according to tests
#Also need to make sure that this exists
if(hostPtr != -1):
url_dict['path'] = url[hostPtr+1:];
else:
url_dict['path'] = "";
print(url_dict['scheme']);
print(url_dict['host']);
print(url_dict['path']);
return url_dict;
# -----------------------------------------------
# TEST CODE AND MAIN
# -----------------------------------------------
def test_parse_url(url_dict):
if type(url_dict) != dict:
print "FAIL: you need to return a Dictionary from parse_url()"
return
the_keys = url_dict.keys()
if not 'scheme' in the_keys:
print "FAIL: your dictionary is missing the key 'scheme'"
return
if not 'host' in the_keys:
print "FAIL: your dictionary is missing the key 'host'"
return
if not 'path' in the_keys:
print "FAIL: your dictionary is missing the key 'path'"
return
if url_dict.get('scheme') != "http":
print "FAIL: key: 'scheme' should be 'http'"
return
if url_dict.get('host') != "www.vandyhacks.org":
print "FAIL: key: 'host' should be 'www.vandyhacks.org'"
return
if url_dict.get('path') != "dostuff/now":
print "FAIL: key: 'path' should be 'dostuff/now'"
return
print "WELL DONE YOU PASSED -- !!"
parsed_url = parse_url(the_url)
test_parse_url(parsed_url)
#My test code
#for url in simple_test:
# print("Orig URL: " + url);
# parse_url(url);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment