Created
July 8, 2015 06:34
-
-
Save Chitrank-Dixit/1cb33029b18051e9e03a to your computer and use it in GitHub Desktop.
The following program searches for the occurrence of a keyword from a supplied page
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is the script download a page from the server and then search from it the required keyword | |
import urllib2 | |
def get_all_occurences(page): | |
length = len(keyword) | |
start_link = page.find(keyword) | |
if start_link == -1: | |
return None, 0 | |
end_quote = start_link + length | |
url = page[start_link : end_quote] | |
return url, end_quote | |
def print_all_occurences(page): | |
while page!=None: | |
url, end_pos = get_all_occurences(page) | |
if url: | |
print "Found ",url | |
page = page[end_pos : ] | |
else: | |
return None, 0 | |
def download_page(html_page): | |
sourceFile=urllib2.urlopen(html_page) | |
print_all_occurences(sourceFile.read()) | |
link = raw_input("Please Enter the URL to search keyword from:") | |
keyword = raw_input("Enter the keyword to search in the page:") | |
download_page(link) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment