robla · January 24, 2016 05:33
diff --git a/striptrackers.py b/striptrackers.py
 #!/usr/bin/env python
 #
 # Copyright 2016 Rob Lanphier
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 import argparse
 from bs4 import BeautifulSoup


 def main():
    parser = argparse.ArgumentParser(
        description='Strip tracking URLs from file, print on stdout')

    parser.add_argument('htmlfile')
    args = parser.parse_args()
    htmlhandle = open(args.htmlfile)
    soup = BeautifulSoup(htmlhandle)
    for link in soup.find_all('a'):
        import urllib.parse
        url = link.get('href')
        urlobj = urllib.parse.urlparse(url)
        if(urlobj.netloc == 'www.google.com'):
            q = urllib.parse.parse_qs(urlobj.query)['q'][0]
            if(q):
                link['href']=q
    print(soup.prettify())

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python
	#
	# Copyright 2016 Rob Lanphier
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import argparse
	from bs4 import BeautifulSoup


	def main():
	parser = argparse.ArgumentParser(
	description='Strip tracking URLs from file, print on stdout')

	parser.add_argument('htmlfile')
	args = parser.parse_args()
	htmlhandle = open(args.htmlfile)
	soup = BeautifulSoup(htmlhandle)
	for link in soup.find_all('a'):
	import urllib.parse
	url = link.get('href')
	urlobj = urllib.parse.urlparse(url)
	if(urlobj.netloc == 'www.google.com'):
	q = urllib.parse.parse_qs(urlobj.query)['q'][0]
	if(q):
	link['href']=q
	print(soup.prettify())

	if __name__ == "__main__":
	main()
No results found