Last active
March 3, 2018 06:15
-
-
Save geffy/d19c5d8010b7f9c1bbcc8955e07c7cc2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# note: it is not a working script, just some parts of code | |
## 1. preprocessing and batching part (using pytorch Dataset class) | |
import torch.utils.data as data | |
def raw_labels_to_sparseTriple(arr): | |
arr = [sorted(x) for x in arr] | |
if len(arr)==0: | |
raise RuntimeError('empty arr') | |
cols = np.concatenate(arr) | |
vals = np.ones_like(cols) | |
inds = np.zeros((len(cols), 2), dtype=np.int32) | |
inds[:, 1] = cols | |
offset = 0 | |
for i, row in enumerate(arr): | |
row_len = len(row) | |
inds[offset:(offset+row_len), 0] = i | |
offset +=row_len | |
sp_shape = (len(arr), 4716) | |
return (inds, vals, sp_shape) | |
def normalize_array(X): | |
norms = np.linalg.norm(X, axis=1) | |
return X / norms[:, None] | |
class YTDataset(data.Dataset): | |
n_scatter = 8 | |
def __init__(self, glob_pattern): | |
self.pattern = glob_pattern | |
self.files = glob.glob(glob_pattern) | |
def __len__(self): | |
return len(self.files) | |
def __getitem__(self, index): | |
npz_file = np.load(self.files[index]) | |
rgb_raw = normalize_array(npz_file['rgb']) | |
audio_raw = normalize_array(npz_file['audio']) / 2.2 # to make ~ (-0.1, 0.1) | |
labels_raw = npz_file['labels'] | |
inputs = np.hstack((rgb_raw, audio_raw)) | |
if len(labels_raw)==0: | |
targets = [] | |
else: | |
targets = raw_labels_to_sparseTriple(labels_raw) | |
return inputs, targets, npz_file['ids'] | |
## 2. train/valid split | |
ds_train = YTDataset('/ssd/yt8m/data_npz/train*.npz') | |
ds_train = data.DataLoader(ds_train, batch_size=1, shuffle=True, num_workers=5, collate_fn=lambda x: x) | |
ds_val = YTDataset('/ssd/yt8m/data_npz/valid*[a-c].npz') | |
ds_val = data.DataLoader(ds_val, batch_size=1, shuffle=False, num_workers=5, collate_fn=lambda x: x) | |
# 3. model description | |
class CoreModel(): | |
n_features = 1152 | |
n_classes = 4716 | |
def __init__(self, seed=None): | |
self.graph = tf.Graph() | |
self.graph.seed = seed | |
with self.graph.as_default(): | |
# placeholder | |
self.train_x = tf.placeholder(tf.float32, name='X', shape=[None, self.n_features]) | |
self.train_y = tf.sparse_placeholder(tf.int8, name='Y') | |
self.labels = tf.sparse_tensor_to_dense(self.train_y) | |
# wide model -- just FC over tranfsormed input | |
self.X = tf.concat([tf.nn.relu(self.train_x), tf.nn.relu(-self.train_x)], axis=1) | |
wide = slim.fully_connected(self.train_x, num_outputs=self.n_classes, activation_fn=None) | |
# deep part -- MLP over original input | |
net = slim.fully_connected(self.train_x, num_outputs=1024, activation_fn=tf.sigmoid) | |
net = slim.fully_connected(net, num_outputs=1024, activation_fn=tf.sigmoid) | |
deep = slim.fully_connected(net, num_outputs=self.n_classes, activation_fn=None) | |
# final output | |
self.output = tf.sigmoid(deep + wide) | |
# loss | |
xent = losses.CrossEntropyLoss() | |
self.target = xent.calculate_loss(self.output, self.labels) | |
tf.summary.scalar('target', self.target) | |
# additional nodes | |
self.trainer = tf.train.AdamOptimizer(0.01).minimize(self.target) | |
self.trainer2 = tf.train.AdamOptimizer(0.001).minimize(self.target) | |
self.trainer3 = tf.train.AdamOptimizer(0.0001).minimize(self.target) | |
self.init_all_vars = tf.global_variables_initializer() | |
self.summary_op = tf.summary.merge_all() | |
self.saver = tf.train.Saver() | |
## 4. Results | |
# On validation, this gets about (after 30 epoches, maybe learning_rate scheduling is needed) | |
# { | |
# 'gap': 0.80407694408983299, | |
# 'avg_hit_at_one': 0.85421149940896557, | |
# 'avg_loss': 5.4225551040023783, | |
# 'avg_perr': 0.72940944443862732 | |
# } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment