Created
August 22, 2023 06:42
-
-
Save zeddee/1108720fb7fe662db46c194a47decc34 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Run with pytest | |
""" | |
import csv | |
from typing import List, Iterable | |
from io import StringIO | |
from copy import copy | |
TEST_DATA_WITH_SPACES = StringIO("""# "first_seen_utc","sha256_hash","md5_hash","sha1_hash","reporter","file_name","file_type_guess","mime_type","signature","clamav","vtpercent","imphash","ssdeep","tlsh" | |
"2023-08-21 22:53:38", "fd834695fc878b5ed5178dad8,6cf30130adf4a7ac88d5997f8eb7814ca41f211", "db896ff1c3a206de4da48748f0a4731d", "28e85fe7ff3e99fd2c629db98690ecf72194ba7c", "SquiblydooBlog", "installer-package.exe.zip", "zip", "application/zip", "n/a", "n/a", "n/a", "n/a", "24576:2KyTK3lpgjVaC5CAfmXAYpDRvGGOtL0DBlkyYXPGEIwP:WCPeVa4/fkAqD9U8BKJ+EI8", "T139F5EEE784C42CC293FF041525A3EF6032BE2584A138D24F7C591CDB688DD95AA5FBB9" | |
"2023-08-21 22:52:47", "b44f87ac8e73b8338f7f0c689f4782c7aff9ec9c569cd68b8c01c6b1ff65beb3", "a41f96e758448283ba25e0c2db3e07f0", "e57f811d752dcac7718e259624755edbf26477b1", "SquiblydooBlog", "LABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZPEwjVIODEQqXUZPKRVAUMUaE9kGQoXTetKGxWBJpAooXeTM3z0VhaHmZWr8.dll", "dll", "application/x-dosexec", "n/a", "n/a", "n/a", "dae02f32a21e03ce65412f6e56942daa", "12288:yzSzJpUTzucAp9dDONwdwRm5lyc9iJbXpf:Rpkg39KRmPsbXZ", "T1EDE4DE143BA4DC508B6C16E868DB97079B2356A7DEEFFF070AA291340A5B86347513CF" | |
""") | |
# CSV input that contains delimiters that are followed by a space (``, ``) | |
TEST_DATA_WITH_NO_SPACES = StringIO("""# "first_seen_utc","sha256_hash","md5_hash","sha1_hash","reporter","file_name","file_type_guess","mime_type","signature","clamav","vtpercent","imphash","ssdeep","tlsh" | |
"2023-08-21 22:53:38","fd834695fc878b5ed5178dad8,6cf30130adf4a7ac88d5997f8eb7814ca41f211","db896ff1c3a206de4da48748f0a4731d","28e85fe7ff3e99fd2c629db98690ecf72194ba7c","SquiblydooBlog","installer-package.exe.zip","zip","application/zip","n/a","n/a","n/a","n/a","24576:2KyTK3lpgjVaC5CAfmXAYpDRvGGOtL0DBlkyYXPGEIwP:WCPeVa4/fkAqD9U8BKJ+EI8","T139F5EEE784C42CC293FF041525A3EF6032BE2584A138D24F7C591CDB688DD95AA5FBB9" | |
"2023-08-21 22:52:47","b44f87ac8e73b8338f7f0c689f4782c7aff9ec9c569cd68b8c01c6b1ff65beb3","a41f96e758448283ba25e0c2db3e07f0","e57f811d752dcac7718e259624755edbf26477b1","SquiblydooBlog","LABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZPEwjVIODEQqXUZPKRVAUMUaE9kGQoXTetKGxWBJpAooXeTM3z0VhaHmZWr8.dll","dll","application/x-dosexec","n/a","n/a","n/a","dae02f32a21e03ce65412f6e56942daa","12288:yzSzJpUTzucAp9dDONwdwRm5lyc9iJbXpf:Rpkg39KRmPsbXZ","T1EDE4DE143BA4DC508B6C16E868DB97079B2356A7DEEFFF070AA291340A5B86347513CF" | |
""") | |
# CSV input that contains delimiters with no following space (``,``) | |
def read_csv(file_object: Iterable[str]) -> List[List[str]]: | |
"""Helper that wraps CSV reader. | |
:file_object param: Can take a file object returned by `open(filename: str)`, or a StringIO object. | |
""" | |
out: List[List[str]] = [] | |
reader = csv.reader(file_object, delimiter=',', quotechar='"', strict=True) | |
for row in reader: | |
out.append(row) | |
return out | |
def read_csv_skipinitialspace(file_object: Iterable[str]) -> List[List[str]]: | |
"""Same as ``read_csv``, but adds ``skipinitialspace=True``. | |
""" | |
out: List[List[str]] = [] | |
reader = csv.reader(file_object, delimiter=',', quotechar='"', skipinitialspace=True, strict=True) | |
for row in reader: | |
out.append(row) | |
return out | |
def test_read_with_spaces(): | |
"""Read CSV input that | |
""" | |
out = read_csv(copy(TEST_DATA_WITH_SPACES)) | |
row1 = out[1] | |
row2 = out[2] | |
assert(len(row1) != len(row2)), f"\nRow lengths are not equal\nRow 1:{len(row1)}\nRow 2:{len(row2)}" | |
def test_skip_initial_space(): | |
out = read_csv_skipinitialspace(copy(TEST_DATA_WITH_SPACES)) | |
row1 = out[1] | |
row2 = out[2] | |
assert(len(row1) == len(row2)), f"\nRow lengths are equal\nRow 1:{len(row1)}\nRow 2:{len(row2)}" | |
def test_read_with_no_spaces(): | |
out = read_csv(copy(TEST_DATA_WITH_NO_SPACES)) | |
row1 = out[1] | |
row2 = out[2] | |
assert(len(row1) == len(row2)), f"\nRow lengths are equal\nRow 1:{len(row1)}\nRow 2:{len(row2)}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment