Last active
August 2, 2021 19:50
-
-
Save pramos/70c34fbe60893912a7467fb2784fa267 to your computer and use it in GitHub Desktop.
Extracting URLs from pcap file with Scapy using rdpcap()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
from scapy.all import * | |
from memory_profiler import profile | |
@profile | |
def get_url_from_payload(payload): | |
http_header_regex = r"(?P<name>.*?): (?P<value>.*?)\r\n" | |
start = payload.index(b"GET ") +4 | |
end = payload.index(b" HTTP/1.1") | |
url_path = payload[start:end].decode("utf8") | |
http_header_raw = payload[:payload.index(b"\r\n\r\n") + 2 ] | |
http_header_parsed = dict(re.findall(http_header_regex, http_header_raw.decode("utf8"))) | |
url = http_header_parsed["Host"] + url_path + "\n" | |
return url | |
@profile | |
def parse_pcap(pcap_path, urls_file): | |
pcap_flow = rdpcap(pcap_path) | |
sessions = pcap_flow.sessions() | |
urls_output = open(urls_file, "wb") | |
for session in sessions: | |
for packet in sessions[session]: | |
try: | |
if packet[TCP].dport == 80: | |
payload = bytes(packet[TCP].payload) | |
url = get_url_from_payload(payload) | |
urls_output.write(url.encode()) | |
except Exception as e: | |
pass | |
urls_output.close() | |
def main(arguments): | |
if len(arguments) == 5: | |
if arguments[1] == "--pcap" and arguments[3] == "--output": | |
parse_pcap(arguments[2], arguments[4]) | |
if __name__ == "__main__": | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment