Skip to content

Instantly share code, notes, and snippets.

@scudette
Created June 1, 2023 09:43
Show Gist options
  • Save scudette/40f49fb64383eed489667ca9fade93f4 to your computer and use it in GitHub Desktop.
Save scudette/40f49fb64383eed489667ca9fade93f4 to your computer and use it in GitHub Desktop.
An artifact that can search a PDF
name: Generic.Search.PDF
description: |
This artifact searches PDF files for a keyword.
parameters:
- name: PDFGlob
default: /tmp/*.pdf
description: A glob to find PDF files
- name: YaraRule
type: yara
default: |
rule X {
strings:
$a = "SECRET" wide nocase
condition: any of them
}
column_types:
- name: CompressedStream
type: upload_preview
- name: TextStream
type: upload_preview
- name: DeflatedStream
type: upload_preview
- name: Upload
type: upload_preview
- name: Match
type: hex
sources:
- query: |
-- To decompress a deflate stream convert to a gzip file by
-- slapping a header on top and removing the deflate header.
LET Deflate(Stream) = read_file(
length=100000,
accessor="gzip",
filename=pathspec(
DelegateAccessor="data",
DelegatePath="\x1F\x8B\x08\x00\x00\x00\x00\x00\x00\x03" + Stream[2:]))
LET MatchRule(Data) = SELECT String.Data AS Hit,
String.HexData as HexHit,
String.Offset AS Offset
FROM yara(accessor="data",
files=Data,
number=1, context=50,
rules=YaraRule)
LIMIT 1
LET Parsed(Filename) = SELECT parse_string_with_regex(
string=Object,
regex='''(?sm)(?P<Header>(?P<ObjNumber>\d+ \d+) obj[\r\n]+.+?stream)\r?\n(?P<Stream>.+?)endstream''') AS Data
FROM parse_records_with_regex(
file=Filename,
regex='''(?sm)(?P<Object>\d+ \d+ obj[\n\r]+.+?[\r\n]+endobj[\r\n]+)''',
buffer_size=1000000)
WHERE Data.Header =~ "/FlateDecode"
AND log(message=format(format="%v: Inspecting object %v of size %v",
args=[Filename, Data.ObjNumber, len(list=Data.Stream)]))
LET Letters(Data) = SELECT X
FROM parse_records_with_regex(
accessor="data",
file=Data,
regex='''\((?P<X>[^/()]+?)\)''')
LET Text(Data) = join(sep="",array=Letters(Data=Data).X)
LET Decoded(Filename) = SELECT Data.ObjNumber AS ObjNumber,
Data.Header AS Header,
Data.Stream AS CompressedStream,
Deflate(Stream=Data.Stream) AS DeflatedStream,
Text(Data=Deflate(Stream=Data.Stream)) AS TextStream
FROM Parsed(Filename=Filename)
LET SearchFile(Filename) = SELECT ObjNumber, Header,
upload(file=Filename) AS Upload,
upload(accessor="data", file=CompressedStream,
name=Filename + ObjNumber + "Compressed") as CompressedStream,
upload(accessor="data", file=DeflatedStream,
name=Filename + ObjNumber + "Deflated") AS DeflatedStream,
upload(accessor="data", file=TextStream,
name=Filename + ObjNumber + "Text") AS TextStream,
MatchRule(Data=DeflatedStream + TextStream) AS Match
FROM Decoded(Filename=Filename)
WHERE Match
LIMIT 1
SELECT * FROM foreach(row={
SELECT * FROM glob(globs=PDFGlob)
}, query={
SELECT OSPath, ObjNumber, Header,
CompressedStream, DeflatedStream,
TextStream, Upload,
format(format="%02x", args=Match.Hit) AS Match,
Match[0].Offset AS MatchOffset
FROM SearchFile(Filename=OSPath)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment