Created
January 30, 2012 06:16
-
-
Save usahg/1702880 to your computer and use it in GitHub Desktop.
python crawler extract javascript masked links window.open
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#author : osman ehmad | |
import urllib2 | |
page = urllib2.urlopen('http://www.javascript-coder.com/files/window-popup/javascript-window-open-example1.html') | |
source = page.read() | |
# SCRIPT or script will depend upon pages you want crawled | |
# or both can easily be incorporated | |
# we first find where javascript is located, if there are multiple responses that can also be catered for | |
js_start = source.find('<SCRIPT') | |
js_end = source.find('</SCRIPT') | |
javascript = source[js_start:js_end] | |
#^ find all javascript on the page | |
print "\n\n\n ---printing javascript---\n\n\n" | |
print javascript | |
# finds window.open function | |
function_start = javascript.find('window.open(') | |
function_end = javascript.find(');') | |
arguments = javascript[function_start:function_end] | |
# splits the function up in its argument | |
broken = arguments.split(',') | |
print "\n\n\n ---printing the required function after parsing---\n\n\n" | |
print broken | |
#^ because first argument in js window.open is link, we print it | |
link = broken[0].split('(') | |
#^ this will parse broken[0] to extract required link string | |
print "\n\n\n ---printing masked javascript link--\n\n\n" | |
print link | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment