Skip to content

Instantly share code, notes, and snippets.

@pramos
Last active August 2, 2021 19:50
Show Gist options
  • Save pramos/ebdb14eb213f7ed5511b4dcf42a47549 to your computer and use it in GitHub Desktop.
Save pramos/ebdb14eb213f7ed5511b4dcf42a47549 to your computer and use it in GitHub Desktop.
Extracting URLs from pcap file with Scapy using PcapReader()
import sys
import re
from scapy.all import *
from memory_profiler import profile
@profile
def get_url_from_payload(payload):
http_header_regex = r"(?P<name>.*?): (?P<value>.*?)\r\n"
start = payload.index(b"GET ") +4
end = payload.index(b" HTTP/1.1")
url_path = payload[start:end].decode("utf8")
http_header_raw = payload[:payload.index(b"\r\n\r\n") + 2 ]
http_header_parsed = dict(re.findall(http_header_regex, http_header_raw.decode("utf8")))
url = http_header_parsed["Host"] + url_path + "\n"
return url
@profile
def parse_pcap(pcap_path, urls_file):
urls_output = open(urls_file,'wb')
for packet in PcapReader(pcap_path):
try:
if packet[TCP].dport == 80:
payload = bytes(packet[TCP].payload)
url = get_url_from_payload(payload)
urls_output.write(url.encode())
except Exception as e:
pass
urls_output.close()
def main(arguments):
if len(arguments) == 5:
if arguments[1] == "--pcap" and arguments[3] == "--output":
parse_pcap(arguments[2], arguments[4])
if __name__ == "__main__":
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment