This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pdfTextMiner.py | |
# Python 2.7.6 | |
# For Python 3.x use pdfminer3k module | |
# This link has useful information on components of the program | |
# https://euske.github.io/pdfminer/programming.html | |
# http://denis.papathanasiou.org/posts/2010.08.04.post.html | |
''' Important classes to remember | |
PDFParser - fetches data from pdf file |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python 2.7.6. | |
# portScanner.py | |
import socket | |
from datetime import datetime | |
import sys | |
# Here we are scanning your own terminal | |
# Replace this with gethostbyname("host") to scan a remote host |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 2.7.6 | |
# timeZoneExplorer.py | |
from pytz import timezone, common_timezones # import all_timezones for more exhaustive list | |
from datetime import datetime | |
import os | |
# Log file will be created in the same folder as the python script | |
my_path = "." | |
log_path = os.path.join(my_path + "/" + "loc_log.txt") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# jsonToCSV.py | |
# Python 2.7.6 | |
''' | |
Place all the json payloads as separate text files in base folder | |
Program will extract each payload and generate single csv file | |
csv file will have key value pairs in separate columns | |
''' | |
import json |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# forbes2kMiner.py | |
# Python 3.4 | |
""" | |
Extracts the Forbes Global 2000 list of companies and imports into a CSV file | |
Since Forbes is a JS rendered site, selenium is used to mimic user action | |
BeautifulSoup is used to scrape html content | |
Since selenium is used, Firefox is needed as webdiver | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# persistListOfDicts.py | |
# Python 2.7.6 | |
import json | |
import os | |
import pickle # To persist each dict | |
json_path = "./JSON" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Logger.py | |
# Python2.7.6 | |
# For more details - https://docs.python.org/3/howto/logging.html#logging-basic-tutorial | |
# logging.error - just displays the error message | |
# logging.exception - displays the stack trace along with the error message | |
import logging # For logs | |
import sys # To read parameters from command line | |
# Define the format of the logging |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# persistListOfDicts.py | |
# Python 2.7.6 | |
import json | |
import os | |
json_path = "./JSON" | |
# Write dicts into a pickle file each |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Python 2.7.6 | |
#DemergePDF.py | |
#Gets raw_inputs of 1 PDF file names from user and demerge into 2 | |
import PyPDF2 | |
import os | |
def getFileNameFromUser (file, path): | |
pdf_file_name = raw_input("Enter {0} name: ".format(file)) | |
if pdf_file_name in os.listdir(path): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Python 2.7.6 | |
#CombinePDF_Py2.py | |
#Gets raw_inputs of 2 PDF file names from user and combines them into 1 | |
import PyPDF2 | |
import os | |
def getFileNameFromUser (file, path): | |
pdf_file_name = raw_input("Enter {0} name: ".format(file)) | |
if pdf_file_name in os.listdir(path): |