Skip to content

Instantly share code, notes, and snippets.

@vralex
Created April 11, 2014 06:27
Show Gist options
  • Save vralex/10444040 to your computer and use it in GitHub Desktop.
Save vralex/10444040 to your computer and use it in GitHub Desktop.
__author__ = 'VladimirDel'
import sys
import os
import csv
CLASSES_DIR = "/Users/VladimirDel/Projects/java/diploma/dist/data/network/kddcup/classes/"
OUTPUT_DIR = "/Users/VladimirDel/Projects/java/diploma/dist/data/network/kddcup/output/"
TEST_DATASET = "kddcup.tst"
TRAIN_DATASET = "kddcup.tra"
NORMAL_FILENAME = "normal.data"
TOTAL_CLASSES = 23
NORMAL_RATIO = 0.5
def append(writer, row):
writer.writerow(row)
normal_count = int(sys.argv[1])
total_count = int(normal_count / NORMAL_RATIO)
attack_count = int((total_count - normal_count + TOTAL_CLASSES - 2) / (TOTAL_CLASSES - 1))
print("Taking " + str(normal_count) + " normal records")
print("Total records: " + str(total_count))
print("Taking " + str(attack_count) + " records of each attack type")
print()
files = [CLASSES_DIR + file for file in os.listdir(CLASSES_DIR)]
test_dataset = open(OUTPUT_DIR + TEST_DATASET, mode="w+", newline='')
train_dataset = open(OUTPUT_DIR + TRAIN_DATASET, mode="w+", newline='')
test_writer = csv.writer(test_dataset)
train_writer = csv.writer(train_dataset)
for file in files:
with open(file, 'r', newline='') as attack:
reader = csv.reader(attack)
row_count = 0
check = normal_count if file == CLASSES_DIR + NORMAL_FILENAME else attack_count
for row in reader:
row[-1] = row[-1][:-1]
if row_count < check:
append(train_writer, row)
else:
append(test_writer, row)
row_count += 1
test_dataset.close()
train_dataset.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment