Feynman27 · March 23, 2017 14:32 · milinddeore · Jun 13, 2018 · aezco · Jul 16, 2019
diff --git a/VideoCNNLSTM_minimal.py b/VideoCNNLSTM_minimal.py
 import tensorflow as tf

 from keras import backend as K
 from keras.layers import Conv2D, MaxPooling2D, Flatten
 from keras.layers import Input, LSTM, Embedding, Dense
 from keras.models import Model, Sequential
 from keras.applications import InceptionV3, VGG19
 from keras.layers import TimeDistributed

 import numpy as np

 def main():
    ## Define vision model
    ## Inception (currently doesn't work)
    cnn = InceptionV3(weights='imagenet',
                      include_top='False',
                      pooling='avg')

    # Works
    #cnn = VGG19(weights='imagenet',
    #            include_top='False', pooling='avg')

    cnn.trainable = False

    H=W=229
    C = 3
    video_input = Input(shape=(None,H,W,C), name='video_input')

    encoded_frame_sequence = TimeDistributed(cnn)(video_input) # the output will be a sequence of vectors

    encoded_video = LSTM(256)(encoded_frame_sequence)  # the output will be a vector

    output = Dense(256, activation='relu')(encoded_video)

    video_model = Model(inputs=[video_input], outputs=output)

    print(video_model.summary())

    video_model.compile(optimizer='adam', loss='mean_squared_error')

    video_model.compile(optimizer='adam', loss='mean_squared_error')

    #features = np.empty((0,1000))

    n_samples = 1
    n_frames = 50

    frame_sequence = np.random.randint(0.0,255.0,size=(n_samples, n_frames, H,W,C))

    y = np.random.random(size=(256,))
    y = np.reshape(y,(-1,256))

    print(frame_sequence.shape)

    video_model.fit(frame_sequence, y, validation_split=0.0,shuffle=False, batch_size=1)

 if __name__=='__main__':
    main()
	import tensorflow as tf

	from keras import backend as K
	from keras.layers import Conv2D, MaxPooling2D, Flatten
	from keras.layers import Input, LSTM, Embedding, Dense
	from keras.models import Model, Sequential
	from keras.applications import InceptionV3, VGG19
	from keras.layers import TimeDistributed

	import numpy as np

	def main():
	## Define vision model
	## Inception (currently doesn't work)
	cnn = InceptionV3(weights='imagenet',
	include_top='False',
	pooling='avg')

	# Works
	#cnn = VGG19(weights='imagenet',
	# include_top='False', pooling='avg')

	cnn.trainable = False

	H=W=229
	C = 3
	video_input = Input(shape=(None,H,W,C), name='video_input')

	encoded_frame_sequence = TimeDistributed(cnn)(video_input) # the output will be a sequence of vectors

	encoded_video = LSTM(256)(encoded_frame_sequence) # the output will be a vector

	output = Dense(256, activation='relu')(encoded_video)

	video_model = Model(inputs=[video_input], outputs=output)

	print(video_model.summary())

	video_model.compile(optimizer='adam', loss='mean_squared_error')

	video_model.compile(optimizer='adam', loss='mean_squared_error')

	#features = np.empty((0,1000))

	n_samples = 1
	n_frames = 50

	frame_sequence = np.random.randint(0.0,255.0,size=(n_samples, n_frames, H,W,C))

	y = np.random.random(size=(256,))
	y = np.reshape(y,(-1,256))

	print(frame_sequence.shape)

	video_model.fit(frame_sequence, y, validation_split=0.0,shuffle=False, batch_size=1)

	if __name__=='__main__':
	main()