Skip to content

Instantly share code, notes, and snippets.

@anroots
Created May 11, 2012 01:15
Show Gist options
  • Save anroots/2656905 to your computer and use it in GitHub Desktop.
Save anroots/2656905 to your computer and use it in GitHub Desktop.
A script to get specified info from a URL
#!/usr/bin/python -tt
# A script to get specified info from a URL
# Author Ando Roots 2010
# Usage: Specify the parameters with command line options.
# Usage: ./fetch_info.py URL Search_pattern_start Search_pattern_end
# EXAMPLE: Rate.ee number of online users
# ./fetch_info.py http://www.rate.ee/ 'amp;act=search"&gt;<strong>' ' &amp;raquo;</strong>'
import sys
import urllib
import re
# Fetch the souce code, find and print the information
def fetch_info(url, start, end):
sourceCode = urllib.urlopen(url).read()
expression = start + '(.*?)' + end
# Regular expression matching on the source code of the target URL
match = re.compile(expression).search(sourceCode)
if match:
print match.group(1)
else:
print 'Did not find a match.'
sys.exit(0)
def main():
if len(sys.argv) != 4:
print 'A script to get specified info from a URL by A. Roots'
print 'Usage: ./fetch_info.py URL Search_pattern_start Search_pattern_end'
sys.exit(1)
fetch_info(sys.argv[1], sys.argv[2], sys.argv[3])
sys.exit(0)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment