-
-
Save jinyu121/e530dc9767d8f83c08f3582c71a5cbc8 to your computer and use it in GitHub Desktop.
| # -*- coding: utf-8 -*- | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import argparse | |
| import numpy as np | |
| import os | |
| import random | |
| from tqdm import tqdm | |
| import sklearn.cluster as cluster | |
| def iou(x, centroids): | |
| dists = [] | |
| for centroid in centroids: | |
| c_w, c_h = centroid | |
| w, h = x | |
| if c_w >= w and c_h >= h: | |
| dist = w * h / (c_w * c_h) | |
| elif c_w >= w and c_h <= h: | |
| dist = w * c_h / (w * h + (c_w - w) * c_h) | |
| elif c_w <= w and c_h >= h: | |
| dist = c_w * h / (w * h + c_w * (c_h - h)) | |
| else: # means both w,h are bigger than c_w and c_h respectively | |
| dist = (c_w * c_h) / (w * h) | |
| dists.append(dist) | |
| return np.array(dists) | |
| def avg_iou(x, centroids): | |
| n, d = x.shape | |
| sums = 0. | |
| for i in range(x.shape[0]): | |
| # note IOU() will return array which contains IoU for each centroid and X[i] | |
| # slightly ineffective, but I am too lazy | |
| sums += max(iou(x[i], centroids)) | |
| return sums / n | |
| def write_anchors_to_file(centroids, distance, anchor_file): | |
| anchors = centroids * 416 / 32 # I do not know whi it is 416/32 | |
| anchors = [str(i) for i in anchors.ravel()] | |
| print( | |
| "\n", | |
| "Cluster Result:\n", | |
| "Clusters:", len(centroids), "\n", | |
| "Average IoU:", distance, "\n", | |
| "Anchors:\n", | |
| ", ".join(anchors) | |
| ) | |
| with open(anchor_file, 'w') as f: | |
| f.write(", ".join(anchors)) | |
| f.write('\n%f\n' % distance) | |
| def k_means(x, n_clusters, eps): | |
| init_index = [random.randrange(x.shape[0]) for _ in range(n_clusters)] | |
| centroids = x[init_index] | |
| d = old_d = [] | |
| iterations = 0 | |
| diff = 1e10 | |
| c, dim = centroids.shape | |
| while True: | |
| iterations += 1 | |
| d = np.array([1 - iou(i, centroids) for i in x]) | |
| if len(old_d) > 0: | |
| diff = np.sum(np.abs(d - old_d)) | |
| print('diff = %f' % diff) | |
| if diff < eps or iterations > 1000: | |
| print("Number of iterations took = %d" % iterations) | |
| print("Centroids = ", centroids) | |
| return centroids | |
| # assign samples to centroids | |
| belonging_centroids = np.argmin(d, axis=1) | |
| # calculate the new centroids | |
| centroid_sums = np.zeros((c, dim), np.float) | |
| for i in range(belonging_centroids.shape[0]): | |
| centroid_sums[belonging_centroids[i]] += x[i] | |
| for j in range(c): | |
| centroids[j] = centroid_sums[j] / np.sum(belonging_centroids == j) | |
| old_d = d.copy() | |
| def get_file_content(fnm): | |
| with open(fnm) as f: | |
| return [line.strip() for line in f] | |
| def main(args): | |
| print("Reading Data ...") | |
| file_list = [] | |
| for f in args.file_list: | |
| file_list.extend(get_file_content(f)) | |
| data = [] | |
| for one_file in tqdm(file_list): | |
| one_file = one_file.replace('images', 'labels') \ | |
| .replace('JPEGImages', 'labels') \ | |
| .replace('.png', '.txt') \ | |
| .replace('.jpg', '.txt') | |
| for line in get_file_content(one_file): | |
| clazz, xx, yy, w, h = line.split() | |
| data.append([float(w),float(h)]) | |
| data = np.array(data) | |
| if args.engine.startswith("sklearn"): | |
| if args.engine == "sklearn": | |
| km = cluster.KMeans(n_clusters=args.num_clusters, tol=args.tol, verbose=True) | |
| elif args.engine == "sklearn-mini": | |
| km = cluster.MiniBatchKMeans(n_clusters=args.num_clusters, tol=args.tol, verbose=True) | |
| km.fit(data) | |
| result = km.cluster_centers_ | |
| # distance = km.inertia_ / data.shape[0] | |
| distance = avg_iou(data, result) | |
| else: | |
| result = k_means(data, args.num_clusters, args.tol) | |
| distance = avg_iou(data, result) | |
| write_anchors_to_file(result, distance, args.output) | |
| if "__main__" == __name__: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('file_list', nargs='+', help='TrainList') | |
| parser.add_argument('--num_clusters', '-n', default=5, type=int, help='Number of Clusters') | |
| parser.add_argument('--output', '-o', default='../results/anchor.txt', type=str, help='Result Output File') | |
| parser.add_argument('--tol', '-t', default=0.005, type=float, help='Tolerate') | |
| parser.add_argument('--engine', '-m', default='sklearn', type=str, | |
| choices=['original', 'sklearn', 'sklearn-mini'], help='Method to use') | |
| args = parser.parse_args() | |
| main(args) |
anchors差别可能还和数据有关,我在自己的标注图片上跑的,用同一种算法得到的结果差别不大(和随机初始值有关,但是结果也就差0.1不到),但是不同算法得到的就不一样了
| engine | result |
|---|---|
| sklearn | 11.22762995, 10.73226759, 10.68771405, 9.1692398, 7.452993003, 6.555998014, 6.299477413, 4.831884219, 3.57714225, 3.068625678 |
| original | 10.83653525, 10.64363488, 10.54793825, 7.023316928, 6.781054383, 6.024040211, 6.005671164, 4.968083672, 3.417490904, 1.448678908 |
另外 anchors = centroids * 416 / 32 这一行,是因为输入维度是416,里边有5次pooling,也就是缩小了32
Hi ,
this file generate 10 values of anchors , i have question about these values , as we have 5 anchors and this generator generate 10 values, more likely a first two of 10 values related to first anchor box , right ? if so , what are means of these two values ? W , H for first anchors for aspect ratio and scale for that anchor?
The 10 values can be grouped as 5 pairs. For example, 11.22762995, 10.73226759, 10.68771405, 9.1692398, 7.452993003, 6.555998014, 6.299477413, 4.831884219, 3.57714225, 3.068625678 means (11.22762995, 10.73226759), (10.68771405, 9.1692398), (7.452993003, 6.555998014), (6.299477413, 4.831884219), (3.57714225, 3.068625678)
In my view, the values is H and W in some scale. (So we can just multiply or add them to the output of the net)
大家谁能告诉我那个file_list的参数应该怎么写啊
The txt file can generated by this file. Each file contains multi lines, each line is a full path of one image.
For example, if you have the training list(s) like this:
001
002
003
and
101
102
103
After the processing by voc_label.py, you may get files like
train_part_1.txt
path_to_voc/VOC2007/JPEGImages/001.png
path_to_voc/VOC2007/JPEGImages/002.png
path_to_voc/VOC2007/JPEGImages/003.png
....
train_part_2.txt
path_to_voc/VOC2007/JPEGImages/101.png
path_to_voc/VOC2007/JPEGImages/102.png
path_to_voc/VOC2007/JPEGImages/103.png
....
Then, you can use python ./get_anchor.py train_part_1.txt train_part_2.txt to get anchors.
得到的这10个值,两两相除,得到的就是需要设置的ratios吗?
实际上这里“使用SkLearm来计算”是有问题的。应该是使用长宽比/IoU来进行计算的。
但是实验发现,两种方法生成的anchors差别不大,精度也大致相同。(原谅我对数字不太敏感),所以个人认为是可以通用的。