Last active
May 30, 2021 13:52
-
-
Save karlkranich/aa2d21aad7edcfda1f802c7d172e6302 to your computer and use it in GitHub Desktop.
Find duplicates across Google Sheets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Multisheet Duplicate Finder ** | |
To use, paste this code into a Google Spreadsheets Apps Script (Tools - Script editor) | |
Loops through all of the sheets in the current spreadsheet. | |
Identifies duplicates in the chosen column. | |
Skips the chosen number of header rows. | |
Creates and alert box listing the duplicates, and colors the duplicate cells red. | |
The directive below limits the script to only be able to access this spreadsheet. | |
* @OnlyCurrentDoc | |
*/ | |
function findDuplicatesAcrossSheets() { | |
// Set the following variables to change the script's behavior | |
const COLUMN_TO_CHECK = 8; // A=1, B=2, etc. | |
const HEADER_ROWS = 2; // script will skip this number of rows | |
dupeList = []; // an array to fill with duplicates | |
urlLocs = {}; // track which sheet(s) contain a url | |
var ss = SpreadsheetApp.getActiveSpreadsheet(); | |
var sheets = ss.getSheets(); | |
for (var i = 0; i < sheets.length; i++) { | |
numRows = sheets[i].getLastRow(); | |
if (numRows > HEADER_ROWS) { | |
sheetName = sheets[i].getName(); | |
var data = sheets[i].getRange(HEADER_ROWS+1, COLUMN_TO_CHECK, numRows-HEADER_ROWS, 1).getValues(); | |
for (index in data) { | |
row = parseInt(index) + HEADER_ROWS + 1; | |
var url = data[index][0]; | |
if (url == "") {continue;} // ignore empty url cells | |
// We know we are looking at URLs, so we will do some cleanup specific to that type of data | |
url = url.toLowerCase(); // convert to lowercase | |
url = url.replace("https://", ""); // remove "https://" | |
url = url.replace("http://", ""); // remove "http://" | |
url = url.replace(/^www\./, ""); // remove leading "www." | |
url = url.replace(/\?.*/, ""); // remove "?" and anything following | |
url = url.replace(/\/$/, ""); // remove trailing "/" | |
if (urlLocs.hasOwnProperty(url)) { | |
dupeList.push("duplicate: " + url + " in sheet " + sheetName + " and sheet " + urlLocs[url].sheet); | |
sheets[i].getRange(row,COLUMN_TO_CHECK,1,1).setBackground("red"); | |
ss.getSheetByName(urlLocs[url].sheet).getRange(urlLocs[url].row,COLUMN_TO_CHECK,1,1).setBackground("red"); | |
} | |
urlLocs[url] = {sheet: sheetName, row: row}; | |
} | |
} | |
} | |
if (dupeList.length > 0) { | |
Browser.msgBox(dupeList.join("\\n")); | |
} else { | |
Browser.msgBox("No duplicates found") | |
} | |
} | |
/** | |
* Adds a custom menu to the active spreadsheet | |
*/ | |
function onOpen() { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var entries = [{ | |
name : "Find Duplicates Across Sheets", | |
functionName : "findDuplicatesAcrossSheets" | |
}]; | |
sheet.addMenu("My Scripts", entries); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment