Created
February 24, 2016 01:25
-
-
Save SIRHAMY/103aa453cd0bd8053359 to your computer and use it in GitHub Desktop.
Parsing URL Code - Cause why not?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ----------------------------------------------- | |
# This project should be completed using the | |
# Python programming language. | |
# | |
# Problem: | |
#---------- | |
# Please parse the following URL into parts: | |
# http://www.vandyhacks.org/dostuff/now | |
# | |
# While you've been given a specific URL | |
# Your solution should work for any valid | |
# URL. Below is a skeleton and some test | |
# code. You're job is to fill in the | |
# implmentation of the parse_url() function. | |
# | |
# parse_url() should return a python | |
# dictionary with the following keys | |
# | |
# * scheme | |
# * host | |
# * path | |
# | |
# NOTE: there are a number of solutions to | |
# this problem including pre-built modules | |
# like urlparse. | |
# | |
# Your solution should display your ability | |
# to create efficient readable code that | |
# solves the problem without using pre-built | |
# libraries directly related to parsing URLs | |
# | |
# If you provide multiple ways to solve the | |
# problem that isn't a bad thing. | |
# ----------------------------------------------- | |
the_url = "http://www.vandyhacks.org/dostuff/now"; | |
simple_test = ['http://www.tutorialspoint.com/python/python_variable_types.htm', 'https://dev.twitter.com/cards/mobile/url-schemes','https://wordpress.com/stats/day/www.sirhamy.com::blog','https://twitter.com/', | |
'http://first-news-app.readthedocs.org/en/latest/#act-5-hello-internet', 'http://www.poynter.org/2015/github-tutorials-and-resources-for-journalists/364387/', 'http://www.nltk.org/book/ch06.html', | |
'https://docs.google.com/spreadsheets/d/1hefbuuYoYV7dxzdWgUrepVHhN9LrQtTJXh8i9carKuo/edit#gid=0', 'https://trello.com/b/XucTn63Y/school', 'https://gist.github.com/bmayes/bcdde36130ac797a4175', 'http://sirhamy.com']; | |
def parse_url(url): | |
# make sure your solution is valid | |
# for NOT JUST the test URL. I will | |
# be testing with others. | |
# your code goes here. | |
# do not solve using split() | |
# unitl you add code execution will fail with | |
# an 'IndentationError' until you start to fill | |
# in this function. | |
#Scheme always ends with :// | |
schemePtr = url.find(":"); | |
#Check if host ends with final / or ? which happens after :// | |
hostPtr = url.find("/", schemePtr+3); | |
if(hostPtr == -1): | |
hostPtr = url.find("?",schemePtr+3); | |
url_dict = dict(); | |
url_dict['scheme'] = url[:schemePtr]; | |
if(hostPtr > schemePtr + 3): | |
url_dict['host'] = url[schemePtr+3:hostPtr]; | |
else: | |
url_dict['host'] = url[schemePtr+3:]; | |
#We don't want the initial / in our dictionary - according to tests | |
#Also need to make sure that this exists | |
if(hostPtr != -1): | |
url_dict['path'] = url[hostPtr+1:]; | |
else: | |
url_dict['path'] = ""; | |
print(url_dict['scheme']); | |
print(url_dict['host']); | |
print(url_dict['path']); | |
return url_dict; | |
# ----------------------------------------------- | |
# TEST CODE AND MAIN | |
# ----------------------------------------------- | |
def test_parse_url(url_dict): | |
if type(url_dict) != dict: | |
print "FAIL: you need to return a Dictionary from parse_url()" | |
return | |
the_keys = url_dict.keys() | |
if not 'scheme' in the_keys: | |
print "FAIL: your dictionary is missing the key 'scheme'" | |
return | |
if not 'host' in the_keys: | |
print "FAIL: your dictionary is missing the key 'host'" | |
return | |
if not 'path' in the_keys: | |
print "FAIL: your dictionary is missing the key 'path'" | |
return | |
if url_dict.get('scheme') != "http": | |
print "FAIL: key: 'scheme' should be 'http'" | |
return | |
if url_dict.get('host') != "www.vandyhacks.org": | |
print "FAIL: key: 'host' should be 'www.vandyhacks.org'" | |
return | |
if url_dict.get('path') != "dostuff/now": | |
print "FAIL: key: 'path' should be 'dostuff/now'" | |
return | |
print "WELL DONE YOU PASSED -- !!" | |
parsed_url = parse_url(the_url) | |
test_parse_url(parsed_url) | |
#My test code | |
#for url in simple_test: | |
# print("Orig URL: " + url); | |
# parse_url(url); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment