a-agmon · January 15, 2020 19:34
diff --git a/seq2.py b/seq2.py
 #Build the char index that we will use to encode seqs to numbers 
 #(this char index was written by Jason Brownlee from Machine Learning Mastery)
 char_index = '0abcdefghijklmnopqrstuvwxyz'
 char_index +='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 char_index += '123456789'
 char_index += '().,-/+=&$?@#!*:;_[]|%⸏{}\"\'' + ' ' +'\\'

 char_to_int = dict((c, i) for i, c in enumerate(char_index))
 int_to_char = dict((i, c) for i, c in enumerate(char_index))

 from keras.preprocessing.sequence import pad_sequences
 #function that convert a char seqs to numbers seqs 
 #(it does a little more but lets leave it for now)
 def encode_sequence_list(seqs, feat_n=0):
    encoded_seqs = []
    for seq in seqs:
        encoded_seq = [char_to_int[c] for c in seq]
        encoded_seqs.append(encoded_seq)
    if(feat_n > 0):
        encoded_seqs.append(np.zeros(feat_n))
    return pad_sequences(encoded_seqs, padding='post')

 def decode_sequence_list(seqs):
    decoded_seqs = []
    for seq in seqs:
        decoded_seq = [int_to_char[i] for i in seq]
        decoded_seqs.append(decoded_seq)
    return decoded_seqs
    
 # Using the char_index, the encode_sequence_list function
 # will turn a string like this EBCA0OXO 
 #to an array like this [29 32 27 27  0 42 42 38]

 # encode each string seq to an integer array [[1],[5],[67]], [[45],[76],[7]
 encoded_seqs = encode_sequence_list(random_sequences)
 # mix everything up
 np.random.shuffle(encoded_seqs)
	#Build the char index that we will use to encode seqs to numbers
	#(this char index was written by Jason Brownlee from Machine Learning Mastery)
	char_index = '0abcdefghijklmnopqrstuvwxyz'
	char_index +='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
	char_index += '123456789'
	char_index += '().,-/+=&$?@#!*:;_[]\|%⸏{}\"\'' + ' ' +'\\'

	char_to_int = dict((c, i) for i, c in enumerate(char_index))
	int_to_char = dict((i, c) for i, c in enumerate(char_index))

	from keras.preprocessing.sequence import pad_sequences
	#function that convert a char seqs to numbers seqs
	#(it does a little more but lets leave it for now)
	def encode_sequence_list(seqs, feat_n=0):
	encoded_seqs = []
	for seq in seqs:
	encoded_seq = [char_to_int[c] for c in seq]
	encoded_seqs.append(encoded_seq)
	if(feat_n > 0):
	encoded_seqs.append(np.zeros(feat_n))
	return pad_sequences(encoded_seqs, padding='post')

	def decode_sequence_list(seqs):
	decoded_seqs = []
	for seq in seqs:
	decoded_seq = [int_to_char[i] for i in seq]
	decoded_seqs.append(decoded_seq)
	return decoded_seqs

	# Using the char_index, the encode_sequence_list function
	# will turn a string like this EBCA0OXO
	#to an array like this [29 32 27 27 0 42 42 38]

	# encode each string seq to an integer array [[1],[5],[67]], [[45],[76],[7]
	encoded_seqs = encode_sequence_list(random_sequences)
	# mix everything up
	np.random.shuffle(encoded_seqs)