Created
May 11, 2012 01:15
-
-
Save anroots/2656905 to your computer and use it in GitHub Desktop.
A script to get specified info from a URL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python -tt | |
# A script to get specified info from a URL | |
# Author Ando Roots 2010 | |
# Usage: Specify the parameters with command line options. | |
# Usage: ./fetch_info.py URL Search_pattern_start Search_pattern_end | |
# EXAMPLE: Rate.ee number of online users | |
# ./fetch_info.py http://www.rate.ee/ 'amp;act=search"><strong>' ' &raquo;</strong>' | |
import sys | |
import urllib | |
import re | |
# Fetch the souce code, find and print the information | |
def fetch_info(url, start, end): | |
sourceCode = urllib.urlopen(url).read() | |
expression = start + '(.*?)' + end | |
# Regular expression matching on the source code of the target URL | |
match = re.compile(expression).search(sourceCode) | |
if match: | |
print match.group(1) | |
else: | |
print 'Did not find a match.' | |
sys.exit(0) | |
def main(): | |
if len(sys.argv) != 4: | |
print 'A script to get specified info from a URL by A. Roots' | |
print 'Usage: ./fetch_info.py URL Search_pattern_start Search_pattern_end' | |
sys.exit(1) | |
fetch_info(sys.argv[1], sys.argv[2], sys.argv[3]) | |
sys.exit(0) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment