Last active
August 2, 2021 19:50
-
-
Save pramos/ebdb14eb213f7ed5511b4dcf42a47549 to your computer and use it in GitHub Desktop.
Extracting URLs from pcap file with Scapy using PcapReader()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
from scapy.all import * | |
from memory_profiler import profile | |
@profile | |
def get_url_from_payload(payload): | |
http_header_regex = r"(?P<name>.*?): (?P<value>.*?)\r\n" | |
start = payload.index(b"GET ") +4 | |
end = payload.index(b" HTTP/1.1") | |
url_path = payload[start:end].decode("utf8") | |
http_header_raw = payload[:payload.index(b"\r\n\r\n") + 2 ] | |
http_header_parsed = dict(re.findall(http_header_regex, http_header_raw.decode("utf8"))) | |
url = http_header_parsed["Host"] + url_path + "\n" | |
return url | |
@profile | |
def parse_pcap(pcap_path, urls_file): | |
urls_output = open(urls_file,'wb') | |
for packet in PcapReader(pcap_path): | |
try: | |
if packet[TCP].dport == 80: | |
payload = bytes(packet[TCP].payload) | |
url = get_url_from_payload(payload) | |
urls_output.write(url.encode()) | |
except Exception as e: | |
pass | |
urls_output.close() | |
def main(arguments): | |
if len(arguments) == 5: | |
if arguments[1] == "--pcap" and arguments[3] == "--output": | |
parse_pcap(arguments[2], arguments[4]) | |
if __name__ == "__main__": | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment