Last active
April 17, 2023 00:22
-
-
Save Rovsau/88654e9714d5818288faca43b7c95b32 to your computer and use it in GitHub Desktop.
Extract Named Functions (and Ranges) from all Google Spreadsheets accessible via the credentials.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Xml.Linq; | |
using System.Threading; | |
using System.IO.Compression; | |
using Google.Apis.Auth.OAuth2; | |
using Google.Apis.Services; | |
using Google.Apis.Sheets.v4; | |
using Google.Apis.Sheets.v4.Data; | |
using System.Collections.Generic; | |
using System.Threading.Tasks; | |
using Google.Apis.Drive.v3; | |
using Google.Apis.Drive.v3.Data; | |
using File = Google.Apis.Drive.v3.Data.File; | |
// Source: https://gist.github.com/tanaikech/9a9e571ed662e35eec0aa747bb4e025a | |
namespace Rovsau.Csharp.GoogleAPIGetAllNamedFunctions | |
{ | |
public static class GoogleSheetsNamedFunctionsExtractor | |
{ | |
private const string ApplicationName = "Google Sheets Named Functions and Range Extractor"; | |
private static readonly string[] Scopes = { DriveService.Scope.DriveReadonly }; | |
private const string ClientSecretsRelativePath = "Secrets/mydatabase-123456-a0123456789b.json"; | |
public static async Task Init() | |
{ | |
GoogleCredential credential = GetCredential(ClientSecretsRelativePath, Scopes); | |
DriveService drive = GetDriveService(credential, ApplicationName); | |
SheetsService sheets = GetSheetsService(credential, ApplicationName); | |
List<File> allAccessibleSheets = await GetAllSpreadsheets(drive); | |
Console.WriteLine("Accessible Spreadsheets: " + allAccessibleSheets.Count); | |
foreach (File spreadsheet in allAccessibleSheets) | |
{ | |
Console.WriteLine(spreadsheet.Name); | |
byte[] xlsx = await DownloadXlsxFromSheet(spreadsheet.Id, drive); | |
string[] namedFunctionsAndRanges = await ParseXlsxAndGetNamedFunctionsAsArray(xlsx); | |
foreach (string f in namedFunctionsAndRanges) | |
{ | |
Console.WriteLine(f); | |
} | |
} | |
} | |
public static GoogleCredential GetCredential(string clientSecretsFilePath, string[] scopes) | |
{ | |
return GoogleCredential.FromFile(clientSecretsFilePath).CreateScoped(scopes); | |
} | |
public static DriveService GetDriveService(GoogleCredential credential, string applicationName) | |
{ | |
try | |
{ | |
return | |
new DriveService( | |
new BaseClientService.Initializer() | |
{ | |
HttpClientInitializer = credential, | |
ApplicationName = applicationName | |
}); | |
} | |
catch (Exception ex) | |
{ | |
throw new Exception("Error initializing Drive services: " + ex); | |
} | |
} | |
public static SheetsService GetSheetsService(GoogleCredential credential, string applicationName) | |
{ | |
try | |
{ | |
return | |
new SheetsService( | |
new BaseClientService.Initializer() | |
{ | |
HttpClientInitializer = credential, | |
ApplicationName = applicationName | |
}); | |
} | |
catch (Exception ex) | |
{ | |
throw new Exception("Error initializing Sheets services: " + ex); | |
} | |
} | |
public static BaseClientService.Initializer GetBaseClientServiceInitializer(GoogleCredential credential, string applicationName) | |
{ | |
try | |
{ | |
return // Should I cache this, or a new one for each service? | |
new BaseClientService.Initializer() | |
{ | |
HttpClientInitializer = credential, | |
ApplicationName = applicationName | |
}; | |
} | |
catch (Exception ex) | |
{ | |
throw new Exception("Error initializing BaseClientService.Initializer: " + ex); | |
} | |
} | |
public static async Task<List<File>> GetAllSpreadsheets(DriveService driveService) | |
{ | |
try | |
{ | |
List<File> allSpreadsheets = new List<File>(); | |
string pageToken = null; | |
FilesResource.ListRequest request = driveService.Files.List(); | |
request.Q = "mimeType='application/vnd.google-apps.spreadsheet'"; //"mimeType='application/vnd.google-apps.spreadsheet' and trashed = false"; | |
request.Fields = "nextPageToken, files(id, name)"; //"nextPageToken, files(id, name, mimeType)"; | |
do | |
{ | |
FileList result = await request.ExecuteAsync(); | |
allSpreadsheets.AddRange(result.Files); | |
pageToken = result.NextPageToken; | |
} while (pageToken != null); | |
Console.WriteLine("Fetched all spreadsheets."); | |
return allSpreadsheets; | |
} | |
catch (Exception ex) | |
{ | |
throw new Exception("Error getting all Spreadsheets: " + ex); | |
} | |
} | |
// Not being used in this example. | |
public static async Task<string[]> GetAllNamedRanges(SheetsService sheetsService, string spreadsheetId) | |
{ | |
var namedRanges = new List<string>(); | |
// Get the named ranges from the Google Sheet | |
SpreadsheetsResource.GetRequest request = sheetsService.Spreadsheets.Get(spreadsheetId); | |
request.IncludeGridData = false; | |
Spreadsheet response = await request.ExecuteAsync(); | |
if (response.NamedRanges != null) | |
{ | |
foreach (NamedRange namedRange in response.NamedRanges) | |
{ | |
namedRanges.Add(namedRange.Name); | |
} | |
} | |
return namedRanges.ToArray(); | |
} | |
public static async Task<byte[]> DownloadXlsxFromSheet(string spreadsheetId, DriveService driveService) | |
{ | |
string mimeType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; | |
FilesResource.ExportRequest request = driveService.Files.Export(spreadsheetId, mimeType); | |
var stream = new MemoryStream(); | |
await request.DownloadAsync(stream); | |
return stream.ToArray(); | |
} | |
public static async Task<string[]> ParseXlsxAndGetNamedFunctionsAsArray(byte[] xlsxData) | |
{ | |
// XLSX files are just ZIP files. | |
using var stream = new MemoryStream(xlsxData); | |
using var archive = new ZipArchive(stream, ZipArchiveMode.Read); | |
// The named functions are inside this file. | |
ZipArchiveEntry workbookEntry = archive.GetEntry("xl/workbook.xml"); | |
if (workbookEntry == null) throw new Exception("No workbook entry found."); | |
// Load the file. | |
using Stream workbookStream = workbookEntry.Open(); | |
XDocument xDoc = await XDocument.LoadAsync(workbookStream, LoadOptions.None, CancellationToken.None); | |
XNamespace ns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; | |
// Get the elements which include a definedName. This includes named ranges as well as named functions. | |
IEnumerable<XElement> definedNames = xDoc.Root.Element(ns + "definedNames")?.Elements(ns + "definedName"); | |
var namedFunctions = new List<string>(); | |
if (definedNames != null) | |
{ | |
foreach (XElement definedName in definedNames) | |
{ | |
string name = definedName.Attribute("name")?.Value; | |
string formula = definedName.Value; | |
if (name != null) | |
{ | |
namedFunctions.Add($"{name} = {formula}"); | |
} | |
} | |
} | |
return namedFunctions.ToArray(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment