vralex · April 11, 2014 06:27
diff --git a/make_datasets.py b/make_datasets.py
 __author__ = 'VladimirDel'

 import sys
 import os
 import csv


 CLASSES_DIR = "/Users/VladimirDel/Projects/java/diploma/dist/data/network/kddcup/classes/"
 OUTPUT_DIR = "/Users/VladimirDel/Projects/java/diploma/dist/data/network/kddcup/output/"
 TEST_DATASET = "kddcup.tst"
 TRAIN_DATASET = "kddcup.tra"
 NORMAL_FILENAME = "normal.data"
 TOTAL_CLASSES = 23
 NORMAL_RATIO = 0.5


 def append(writer, row):
    writer.writerow(row)

 normal_count = int(sys.argv[1])
 total_count = int(normal_count / NORMAL_RATIO)
 attack_count = int((total_count - normal_count + TOTAL_CLASSES - 2) / (TOTAL_CLASSES - 1))

 print("Taking " + str(normal_count) + " normal records")
 print("Total records: " + str(total_count))
 print("Taking " + str(attack_count) + " records of each attack type")
 print()

 files = [CLASSES_DIR + file for file in os.listdir(CLASSES_DIR)]

 test_dataset = open(OUTPUT_DIR + TEST_DATASET, mode="w+", newline='')
 train_dataset = open(OUTPUT_DIR + TRAIN_DATASET, mode="w+", newline='')

 test_writer = csv.writer(test_dataset)
 train_writer = csv.writer(train_dataset)

 for file in files:
    with open(file, 'r', newline='') as attack:
        reader = csv.reader(attack)
        row_count = 0
        check = normal_count if file == CLASSES_DIR + NORMAL_FILENAME else attack_count
        for row in reader:
            row[-1] = row[-1][:-1]
            if row_count < check:
                append(train_writer, row)
            else:
                append(test_writer, row)
            row_count += 1

 test_dataset.close()
 train_dataset.close()
	__author__ = 'VladimirDel'

	import sys
	import os
	import csv


	CLASSES_DIR = "/Users/VladimirDel/Projects/java/diploma/dist/data/network/kddcup/classes/"
	OUTPUT_DIR = "/Users/VladimirDel/Projects/java/diploma/dist/data/network/kddcup/output/"
	TEST_DATASET = "kddcup.tst"
	TRAIN_DATASET = "kddcup.tra"
	NORMAL_FILENAME = "normal.data"
	TOTAL_CLASSES = 23
	NORMAL_RATIO = 0.5


	def append(writer, row):
	writer.writerow(row)

	normal_count = int(sys.argv[1])
	total_count = int(normal_count / NORMAL_RATIO)
	attack_count = int((total_count - normal_count + TOTAL_CLASSES - 2) / (TOTAL_CLASSES - 1))

	print("Taking " + str(normal_count) + " normal records")
	print("Total records: " + str(total_count))
	print("Taking " + str(attack_count) + " records of each attack type")
	print()

	files = [CLASSES_DIR + file for file in os.listdir(CLASSES_DIR)]

	test_dataset = open(OUTPUT_DIR + TEST_DATASET, mode="w+", newline='')
	train_dataset = open(OUTPUT_DIR + TRAIN_DATASET, mode="w+", newline='')

	test_writer = csv.writer(test_dataset)
	train_writer = csv.writer(train_dataset)

	for file in files:
	with open(file, 'r', newline='') as attack:
	reader = csv.reader(attack)
	row_count = 0
	check = normal_count if file == CLASSES_DIR + NORMAL_FILENAME else attack_count
	for row in reader:
	row[-1] = row[-1][:-1]
	if row_count < check:
	append(train_writer, row)
	else:
	append(test_writer, row)
	row_count += 1

	test_dataset.close()
	train_dataset.close()
No results found