Last active
August 27, 2022 12:25
-
-
Save timb-machine/878b60650de10ee26f36bddcd3644b07 to your computer and use it in GitHub Desktop.
Words commonly found in Linux DFIR reports
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Generated with: | |
#!/usr/bin/perl | |
my %dictionary; | |
my %words; | |
sub count { | |
$filehandle = shift; | |
while (<$filehandle>) { | |
$_ =~ s/\x0a//g; | |
$_ =~ s/\x0d//g; | |
$_ =~ s/\t/ /g; # deal with tabs | |
$_ =~ s/[^[:print:]]+//g; # deal with non-printable characters | |
$_ =~ s/[,.:;!`'"]/ /g; # deal with stupid punctuation | |
if (($filename =~ /.*nitter.*/) || ($filename =~ /.*blogspot.*/)) { # deal with nitter/blogspot specific formatting | |
$_ =~ s/\[[a-z]+_[a-z]+\]//g; | |
} | |
foreach $word (split(/ /, $_)) { | |
if ($word ne "") { | |
if ($word !~ /h..p[s]*:\/\//) { # deal with links | |
$word =~ s/(.*)_[0-9a-f]{5}$/$1/g; # deal with DFIR reports that appear to result in a lot of <common string>_<5 digits of hex> | |
if ((!$dictionary{$word}) && ($word =~ /[[:alnum:]]{2}[\/_][[:alnum:]]{2}/)) { # eliminate noise | |
if ($words{$word}) { | |
$words{$word} ++; | |
} else { | |
$words{$word} = 1; | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
open($filehandle, "</usr/share/dict/words"); | |
while (<$filehandle>) { | |
$_ =~ s/\x0a//g; | |
$_ =~ s/\x0d//g; | |
$dictionary{$_} = 1; | |
} | |
close($filehandle); | |
%words = (); | |
for $filename (glob("articles/*.html")) { | |
open($filehandle, "w3m -dump " . $filename . "|"); | |
count($filehandle); | |
close($filehandle); | |
} | |
for $filename (glob("articles/*.pdf")) { | |
open($filehandle, "pdftotext " . $filename . " -|"); | |
count($filehandle); | |
close($filehandle); | |
} | |
open($filehandle, ">all-words.txt"); | |
for $word (keys %words) { | |
print $filehandle $words{$word} . "\t" . $word . "\n" | |
} | |
close($filehandle); | |
frequent-words.txt: | |
132 >/dev/null | |
81 /bin/bash | |
79 Penquin_x64 | |
75 Linux/Moose | |
62 Linux/Mirai | |
58 Linux/SSHDoor | |
55 Linux/Ebury | |
53 /etc/ld | |
51 LD_PRELOAD | |
49 x86_64 | |
42 /usr/local/bin/ | |
41 /bin/sh | |
40 and/or | |
38 GNU/Linux | |
38 /etc/cron | |
37 <linux/module | |
36 sys_call_table | |
36 [icon_feed1] | |
36 /dev/null | |
35 Linux/Kaiten | |
34 Linux/XorDDoS | |
33 local_18 | |
33 Netherlands|North_Holland|Amsterdam | |
32 io_Ltd | |
30 /var/run | |
30 /etc/rc | |
30 /etc/hosts | |
29 Linux/ChinaZ | |
29 /dev/shm | |
28 init_module(void) | |
28 cleanup_module(void) | |
27 <sys/syscall | |
27 <linux/kernel | |
25 <sys/types | |
24 system_call | |
24 <linux/malloc | |
23 task_struct | |
23 os/exec | |
23 <linux/fs | |
22 pw_data[ | |
22 Linux/Mayhem | |
22 2>/dev/null | |
22 "hierarchy_id" | |
22 "controller_list" | |
21 bash_history | |
21 END_KMEM | |
21 BEGIN_KMEM | |
21 <linux/types | |
21 /usr/bin/ | |
21 /etc/init | |
infrequent-words.txt: | |
(on the current count, there are over 6000 words that appear just once) |
If at first you don't succeed, break out Perl :).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There is a gloriously stupid bug in version 2 of this. Remind me to get a cup of team before I hack code in future.