Skip to content

Instantly share code, notes, and snippets.

@grigorescu
Last active February 17, 2020 16:38
Show Gist options
  • Save grigorescu/965444e82b2260f31bd1 to your computer and use it in GitHub Desktop.
Save grigorescu/965444e82b2260f31bd1 to your computer and use it in GitHub Desktop.
@load base/frameworks/files
module ExtractHTTP;
## This module extracts file seen over HTTP, according to a flexible redef-able policy.
export {
type Policy: record {
## MIME types to extract
mime_types: set[string] &optional;
## Extraction size limit, in bytes
limit: count &optional;
};
## A table of extraction policies, indexed by the HTTP method
const policies: table[string] of Policy = table() &redef;
## The base directory where you'd like files to be extracted to.
## Beneath this directory there will be individual directories
## created for each day.
const base_dir = "/tmp" &redef;
}
# Normally, this section would go into local.bro
# For streaming HTTP content, we won't know the size of the file until it's too late to make an extraction decision.
# We could extract by default in these case, and then delete, at the cost of increased disk I/O.
# Alternatively, we can resize this buffer to reassemble these many bytes in memory, and then make a decision.
# We set this to 5 KB plus a bit, so we can tell if the file is exactly 5 KB or more.
#
# Note: Bro ships with a default of 4 KB. Increaasing this much further will have a performance impact.
redef default_file_bof_buffer_size = (5*1024) + 32;
redef policies += {
["POST"] = [$mime_types=set("application/xml"), $limit=5*1024],
["GET"] = [$mime_types=set("application/x-dosexec", "application/pdf"), $limit=5*1024],
};
# </local.bro section>
global day_dir_created_for: set[string] = set();
event file_mime_type(f: fa_file, mime_type: string)
{
# HTTP?
if ( ! ( f$source == "HTTP" && f?$http ) )
return;
# A method we care about?
if ( ! f$http?$method || f$http$method ! in policies )
return;
# If we specified mime_types, check to make sure that this is in that set.
# If we didn't specify them, we extract it all
if ( policies[f$http$method]?$mime_types && mime_type ! in policies[f$http$method]$mime_types )
return;
local today = strftime("%Y-%m-%d", network_time());
local today_dir = fmt("%s/%s", base_dir, today);
if ( today_dir !in day_dir_created_for )
{
mkdir(today_dir);
add day_dir_created_for[today_dir];
}
local extract_options = Files::AnalyzerArgs($extract_filename=cat(today_dir, "/", f$id));
# If we didn't set a limit, extract the whole file.
if ( ! policies[f$http$method]?$limit )
{
Files::add_analyzer(f, Files::ANALYZER_EXTRACT, extract_options);
return;
}
# Do we have a file size, and is it > limit? Then return
if ( ( f?$total_bytes && f$total_bytes > policies[f$http$method]$limit ) ||
( f?$seen_bytes && f$seen_bytes > policies[f$http$method]$limit ) )
return;
# Otherwise, extract it
Files::add_analyzer(f, Files::ANALYZER_EXTRACT, extract_options);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment