Created
December 15, 2021 20:31
-
-
Save vorotech/d50f82b3ffd2c1328e50e093e969351b to your computer and use it in GitHub Desktop.
Find duplicates in Google Drive with TypeScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Original example was written with Next JS, but can be any framework | |
// Authorization part is skipped, see the docs of the framework | |
// Importatnt notice, when authenticating with Google the extended scope | |
// which includes 'https://www.googleapis.com/auth/drive' should be requested | |
import { NextApiRequest, NextApiResponse } from 'next' | |
import jwt from 'next-auth/jwt' | |
import { drive_v3, google } from 'googleapis' | |
const secret = process.env.NEXTAUTH_JWT_SECRET | |
type JwtToken = { | |
name: string; | |
email: string; | |
accessToken: string; | |
} | |
type File = { | |
id?: string; | |
name?: string; | |
size?: string; | |
md5Checksum?: string; | |
createdTime?: string; | |
modifiedTime?: string; | |
parents?: string[]; | |
ownedByMe?: boolean; | |
quotaBytesUsed?: string; | |
owners?: drive_v3.Schema$User[]; | |
} | |
type FileParent = { | |
id: string; | |
name: string; | |
root: boolean; | |
} | |
type FileInfo = { | |
id: string; | |
parents: FileParent[]; | |
file: File; | |
} | |
/** | |
* Get all files stored with Google Drive excluding trashed | |
* @param email Owner user email | |
* @param drive Google Drive client | |
* @param pageToken Next page token or undefined | |
* @param files List of files sorted by size | |
*/ | |
async function listFiles(email: string, drive: drive_v3.Drive, pageToken: string, files: File[]) { | |
const res = await drive.files.list({ | |
orderBy: 'quotaBytesUsed', | |
pageSize: 1000, | |
pageToken, | |
// https://developers.google.com/drive/api/v3/search-files#node.js | |
q: `not trashed and '${email}' in owners`, | |
spaces: 'drive', | |
// https://developers.google.com/drive/api/v3/reference/files | |
fields: 'nextPageToken, files(id, name, size, md5Checksum, createdTime, modifiedTime, parents, ownedByMe, quotaBytesUsed)', | |
}); | |
if (res.data.files) { | |
files.push(...res.data.files); | |
} | |
if (res.data.nextPageToken) { | |
await listFiles(email, drive, res.data.nextPageToken, files); | |
} | |
} | |
function traverse(fileInfo: FileInfo, filesMap: Map<string, FileInfo>, rootFolder: File): FileParent[] { | |
const array: FileParent[] = []; | |
const addToArray = (id: string): FileParent[] => { | |
let parent = filesMap.get(id); | |
if(!parent) { | |
if (id == rootFolder.id) { | |
// parent is a root folder | |
parent = { id, file: rootFolder, parents: []}; | |
} else { | |
// data about the parent was filtered out while performing original query | |
parent = { id, file: { id, name: '???'}, parents: [] }; | |
} | |
} | |
array.push({ id, name: parent.file.name, root: id == rootFolder.id }); | |
if (parent.file.parents) { | |
// Check if parents were determined for this item or not | |
if(!parent.parents) { | |
return addToArray(parent.file.parents[0]); | |
} | |
array.push(...parent.parents); | |
} | |
return array; | |
}; | |
return fileInfo.file.parents ? addToArray(fileInfo.file.parents[0]) : array; | |
} | |
export default async (req: NextApiRequest, res: NextApiResponse) => { | |
const jwtToken = (await jwt.getToken({ req, secret, encryption: true })) as JwtToken; | |
if(!jwtToken) { | |
return res.status(401).json({ error: 'Unauthorized' }); | |
} | |
const auth = new google.auth.OAuth2(); | |
auth.setCredentials({ access_token: jwtToken.accessToken }); | |
const drive = google.drive({ version: 'v3', auth }); | |
const files: File[] = []; | |
let rootFolder: File; | |
// Get root folder | |
try { | |
const res = await drive.files.get({ fileId: 'root' }); | |
rootFolder = res.data; | |
} catch (error) { | |
if (error.message == "Invalid Credentials") { | |
return res.status(401).json({ error: 'Unauthorized' }); | |
} | |
return res.status(500).json({ error: error.message, details: error.errors }) | |
} | |
// List files | |
try { | |
await listFiles(jwtToken.email, drive, '', files); | |
} catch (error) { | |
if (error.message == "Invalid Credentials") { | |
return res.status(401).json({ error: 'Unauthorized' }); | |
} | |
return res.status(500).json({ error: error.message, details: error.errors }) | |
} | |
// Map files by id with empty full path | |
const filesMap = new Map<string, FileInfo>(files.map(f => [f.id, { id: f.id, file: f, parents: [] }])); | |
// Group files by md5 checksum | |
const groups = Array.from(filesMap.values()).reduce( | |
(m, i) => { | |
if (i.file.quotaBytesUsed !== "0") { | |
i.parents = traverse(i, filesMap, rootFolder); | |
m.set(i.file.md5Checksum, [...m.get(i.file.md5Checksum) || [], i]); | |
} | |
return m; | |
}, | |
new Map<string, FileInfo[]>() | |
); | |
// Filter groups with more than one item | |
const filteredGroups = Array.from(groups.values()).filter((g: FileInfo[]) => g.length > 1); | |
return res.status(200).json(JSON.stringify(filteredGroups, null, 2)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment