Last active
February 21, 2025 22:18
-
-
Save njvack/f744fe439c175566b961259f904eaef9 to your computer and use it in GitHub Desktop.
Make files for REDCap-based multifactorial designs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Copyright 2025 Board of Regents of the University of Wisconsin System | |
# Written by Nate Vack <[email protected]> at the Center for Healthy Minds | |
""" | |
Generates multi-factorial randomization lists for REDCap. | |
Takes three parameters: | |
* list_length (the approximate length of the list you want to generate | |
* factor_count (the number of factors in your multifactorial design) | |
* file_prefix (the start of the generated filenames) | |
The actual list length will be a multiple of 2^factor_count, we round up. | |
The script will generate factor_count output files. Each output has two columns: | |
redcap_randomization_number (counts up from 1 to the actual list length) | |
redcap_randomization_group (1 or 0, depending on whether this factor is true | |
for this record) | |
""" | |
import numpy as np | |
import sys | |
def generate_shuffled_sequence(seq_range, reps): | |
""" | |
Generate a length * reps length sequence of numbers ranging from 0 to | |
length-1, with each sequence shuffled randomly. | |
""" | |
rng = np.random.default_rng() | |
sequences = np.tile(np.arange(seq_range, dtype=">i4"), (reps, 1)) | |
shuffled = rng.permuted(sequences, axis=1) | |
flat = shuffled.flatten() | |
return flat | |
def bittify_sequence(seq): | |
""" | |
Turns a sequence of uint8 numbers into their bitwise representation | |
Output will be of shape (seq, 8) | |
""" | |
return np.unpackbits(seq.view(np.uint8)).reshape(len(seq), -1) | |
def main(list_length, factor_count, file_prefix): | |
seq_range = 2**factor_count | |
print(f"sequence range: {seq_range}") | |
reps = int(np.ceil(list_length / seq_range)) | |
print(f"reps: {reps}") | |
sequence = generate_shuffled_sequence(seq_range, reps) | |
actual_list_length = len(sequence) | |
print(f"actual list length: {actual_list_length}") | |
bits = bittify_sequence(sequence) | |
print(bits) | |
randomization_numbers = np.arange(1, actual_list_length + 1) | |
for factor_num in range(1, factor_count + 1): | |
fname = f"{file_prefix}_{factor_num}.csv" | |
bitslice = bits[:, -factor_num] | |
print(bitslice) | |
with open(fname, "w") as out: | |
print(f"Writing {fname}") | |
out.write("redcap_randomization_number,redcap_randomization_group\n") | |
for linenum in range(actual_list_length): | |
out.write(f"{randomization_numbers[linenum]},{bitslice[linenum]}\n") | |
if __name__ == "__main__": | |
list_length, factor_count = [int(s) for s in sys.argv[1:3]] | |
file_prefix = sys.argv[3] | |
if factor_count < 1: | |
print("Must generate at least one factor") | |
sys.exit(1) | |
main(list_length, factor_count, file_prefix) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment