Last active
February 17, 2020 16:38
-
-
Save grigorescu/965444e82b2260f31bd1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@load base/frameworks/files | |
module ExtractHTTP; | |
## This module extracts file seen over HTTP, according to a flexible redef-able policy. | |
export { | |
type Policy: record { | |
## MIME types to extract | |
mime_types: set[string] &optional; | |
## Extraction size limit, in bytes | |
limit: count &optional; | |
}; | |
## A table of extraction policies, indexed by the HTTP method | |
const policies: table[string] of Policy = table() &redef; | |
## The base directory where you'd like files to be extracted to. | |
## Beneath this directory there will be individual directories | |
## created for each day. | |
const base_dir = "/tmp" &redef; | |
} | |
# Normally, this section would go into local.bro | |
# For streaming HTTP content, we won't know the size of the file until it's too late to make an extraction decision. | |
# We could extract by default in these case, and then delete, at the cost of increased disk I/O. | |
# Alternatively, we can resize this buffer to reassemble these many bytes in memory, and then make a decision. | |
# We set this to 5 KB plus a bit, so we can tell if the file is exactly 5 KB or more. | |
# | |
# Note: Bro ships with a default of 4 KB. Increaasing this much further will have a performance impact. | |
redef default_file_bof_buffer_size = (5*1024) + 32; | |
redef policies += { | |
["POST"] = [$mime_types=set("application/xml"), $limit=5*1024], | |
["GET"] = [$mime_types=set("application/x-dosexec", "application/pdf"), $limit=5*1024], | |
}; | |
# </local.bro section> | |
global day_dir_created_for: set[string] = set(); | |
event file_mime_type(f: fa_file, mime_type: string) | |
{ | |
# HTTP? | |
if ( ! ( f$source == "HTTP" && f?$http ) ) | |
return; | |
# A method we care about? | |
if ( ! f$http?$method || f$http$method ! in policies ) | |
return; | |
# If we specified mime_types, check to make sure that this is in that set. | |
# If we didn't specify them, we extract it all | |
if ( policies[f$http$method]?$mime_types && mime_type ! in policies[f$http$method]$mime_types ) | |
return; | |
local today = strftime("%Y-%m-%d", network_time()); | |
local today_dir = fmt("%s/%s", base_dir, today); | |
if ( today_dir !in day_dir_created_for ) | |
{ | |
mkdir(today_dir); | |
add day_dir_created_for[today_dir]; | |
} | |
local extract_options = Files::AnalyzerArgs($extract_filename=cat(today_dir, "/", f$id)); | |
# If we didn't set a limit, extract the whole file. | |
if ( ! policies[f$http$method]?$limit ) | |
{ | |
Files::add_analyzer(f, Files::ANALYZER_EXTRACT, extract_options); | |
return; | |
} | |
# Do we have a file size, and is it > limit? Then return | |
if ( ( f?$total_bytes && f$total_bytes > policies[f$http$method]$limit ) || | |
( f?$seen_bytes && f$seen_bytes > policies[f$http$method]$limit ) ) | |
return; | |
# Otherwise, extract it | |
Files::add_analyzer(f, Files::ANALYZER_EXTRACT, extract_options); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment