Last active
December 3, 2021 19:46
-
-
Save vkareh/6f2a015b02330c6d1eb8 to your computer and use it in GitHub Desktop.
MongoDB script that returns a flattened list of all fields ever used on a specific collection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Return a flattened list of all fields ever used on a MongoDB collection. | |
// Usage: `mongo --quiet <database> dbFields.js` | |
// Collection for which to get fields | |
var collection = 'users'; | |
// Determines how to represent the keys of an array of objects. | |
// This allows for differentiating between nested objects and nested arrays of objects. | |
// | |
// e.g. for a document | |
// { education: [ | |
// {university: 'umich', city: 'Ann Arbor'}, | |
// {university: 'mit', city: 'Cambridge'} | |
// ]} | |
// | |
// using an array separator of "$" would represent the flattened keys as "education.$.university", "education.$.city" | |
var arraySeparator = '$'; // Set to '' to omit | |
// Flattens documents fields into "dot-notation" for nested attributes | |
var flatten = function(obj, into, prefix) { | |
into = into || {}; | |
prefix = prefix || ''; | |
for (var k in obj) { | |
if (obj.hasOwnProperty(k)) { | |
var prop = obj[k]; | |
if (prop && typeof prop === 'object' && !(prop instanceof Date || prop instanceof RegExp)) { | |
if (!isNaN(Number(k))) { | |
// Collapse arrays into a single object | |
flatten(prop, into, prefix + (arraySeparator.length ? arraySeparator + '.' : '')); | |
} else { | |
flatten(prop, into, prefix + k + '.'); | |
} | |
} else { | |
if (k === 'str') { | |
// Flatten _id reference fields | |
into[prefix.slice(0, -1)] = prop; | |
} else if (isNaN(Number(k))) { | |
// Skip arrays of values | |
into[prefix + k] = prop; | |
} | |
} | |
} | |
} | |
return Object.keys(into); | |
}; | |
// Return unique values for an array | |
var unique = function(array) { | |
var results = []; | |
var seen = []; | |
array.forEach(function(value, index) { | |
if (seen.indexOf(value) === -1) { | |
seen.push(value); | |
results.push(array[index]); | |
} | |
}); | |
return results; | |
}; | |
var fields = [] | |
// Concatenate flattened fields for all objects into a single array | |
db[collection].find().forEach(function(doc) { | |
fields = unique(fields.concat(flatten(doc))); | |
}); | |
print(fields); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Similar to `dbFields.js`, but without recursive nesting. | |
// It only looks at the first level of an Array or Object. | |
var collection = 'conversations'; | |
var arraySeparator = '$'; | |
var isArray = function(obj) { | |
return Object.prototype.toString.call(obj) === '[object Array]'; | |
}; | |
var isObject = function(obj) { | |
return typeof obj === 'object' && !!obj && !(obj instanceof Date); | |
}; | |
var unique = function(array) { | |
var results = []; | |
var seen = []; | |
array.forEach(function(value, index) { | |
if (seen.indexOf(value) === -1) { | |
seen.push(value); | |
results.push(array[index]); | |
} | |
}); | |
return results; | |
}; | |
var getKeys = function(obj, parent, array) { | |
var keys = []; | |
Object.keys(obj).forEach(function(key) { | |
if (parent) { | |
if (array) { | |
keys.push(parent + '.' + arraySeparator + '.' + key); | |
} else { | |
keys.push(parent + '.' + key); | |
} | |
} else { | |
keys.push(key); | |
} | |
}); | |
return keys; | |
}; | |
var getFields = function(doc) { | |
var fields = []; | |
getKeys(doc).forEach(function(key) { | |
fields.push(key); | |
if (isArray(doc[key])) { | |
doc[key].forEach(function(subDoc, i) { | |
if (isObject(doc[key][i])) { | |
fields = fields.concat(getKeys(subDoc, key, true)); | |
} | |
}); | |
} else if (isObject(doc[key]) && key.substr(0, 1) !== '_') { | |
fields = fields.concat(getKeys(doc[key], key)); | |
} | |
}); | |
return fields; | |
}; | |
var fields = []; | |
db[collection].find().forEach(function(doc) { | |
fields = unique(fields.concat(getFields(doc))); | |
}); | |
print(fields); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment