Created
September 3, 2023 16:43
-
-
Save amirih/8cc4c1d66040c5963c3f4eb3a489aff0 to your computer and use it in GitHub Desktop.
A source code to be reviewed as a part of BMI 500 Homework
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Originated in a private repository: code/geolife-train-test.py | |
# Author: Hossein Amiri (haenter) | |
# All right is reserved to the author | |
import pandas as pandas | |
import utils.utils as utils | |
import utils.files as files | |
import utils.geolife.geo_data as geo_data | |
number_of_needles = 20 | |
train_percentage = 0.9 | |
minimum_needle_records = 100 | |
dataFrame = geo_data.get_dataFrame('origin-no-needle.tsv') | |
dataFrame = geo_data.get_filteredDataFrame(dataFrame, minimum_needle_records) | |
eligible_agents = geo_data.get_agentsRecord(dataFrame) | |
total_agents = len(eligible_agents) | |
trainDataFrame = geo_data.get_trainDataFrame(dataFrame, train_percentage) | |
testDataFrame = geo_data.get_testDataFrame(dataFrame, trainDataFrame) | |
selected_agents = eligible_agents.head(number_of_needles+1) | |
selected_agents = selected_agents.to_dict() | |
replaced_agent_ids = list(selected_agents.keys())[0:number_of_needles] | |
needle_ids = list(selected_agents.keys())[1:number_of_needles+1] | |
swap_agents = (replaced_agent_ids, needle_ids) | |
testDataFrame = geo_data.add_needle(testDataFrame, swap_agents) | |
train_file_name = f'train-{number_of_needles}-needles-{total_agents}-agents-{train_percentage}-normal-portion.tsv' | |
test_file_name = f'test-{number_of_needles}-needles-{total_agents}-agents-{train_percentage}-normal-portion.tsv' | |
files.save_dataFrame(trainDataFrame, train_file_name) | |
files.save_dataFrame(testDataFrame, test_file_name) | |
files.log(f'number of needles: {number_of_needles}') | |
files.log(f'total agents: {total_agents}') | |
files.log(f'normal data for each needle: {train_percentage*100}%') | |
files.log( | |
f'file name pattern: *-numberOfNeedles-needles-totalAgents-agents-trainPercentage-normal-portion.tsv') | |
files.log( | |
f'file name pattern: *-{number_of_needles}-needles-{total_agents}-agents-{train_percentage}-normal-portion.tsv') | |
files.log(f'Needles: {needle_ids}') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment