Created
February 17, 2023 09:36
-
-
Save saagarjha/07e897fd72b370027ce280480761cf1e to your computer and use it in GitHub Desktop.
Apple's metadata extraction code for link previews in Messages, taken from macOS Ventura 13.3 Beta (22E5219e)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// LinkPresentation | |
// Copyright © 2015-2020 Apple Inc. All rights reserved. | |
// | |
// FIXME: Twitter equivalents? | |
(function () { | |
var MetadataExtractor = { | |
doc : undefined, | |
responseURL : undefined, | |
result : {}, | |
resolveURL : function (URLString) { | |
// Don't absolutize empty strings. | |
if (URLString == "" || !URLString) | |
return URLString; | |
var URL = new window.URL(URLString, this.responseURL); | |
return URL.toString(); | |
}, | |
extractAndSortIconsWithSelector : function (selector) { | |
if (!this.doc.head) | |
return []; | |
var extractedIcons = this.doc.head.querySelectorAll(selector); | |
if (!extractedIcons.length) | |
return []; | |
const IconTypeFavicon = 0; | |
const IconTypeAppleTouchIcon = 1; | |
const IconTypeAppleTouchIconPrecomposed = 2; | |
const AppleTouchIconDefaultWidth = 60; | |
function getIconType(icon) { | |
var iconTypeString = icon.getAttribute("rel"); | |
if (iconTypeString === "apple-touch-icon") | |
return IconTypeAppleTouchIcon; | |
if (iconTypeString === "apple-touch-icon-precomposed") | |
return IconTypeAppleTouchIconPrecomposed; | |
return IconTypeFavicon; | |
} | |
function compareIconsDescending(icon1, icon2) { | |
var icon1Type = getIconType(icon1); | |
var icon2Type = getIconType(icon2); | |
// Apple Touch icons always come first. | |
if (icon1Type === IconTypeFavicon && icon2Type !== IconTypeFavicon) | |
return 1; | |
if (icon2Type === IconTypeFavicon && icon1Type !== IconTypeFavicon) | |
return -1; | |
var icon1Size = icon1.getAttribute("sizes"); | |
var icon2Size = icon2.getAttribute("sizes"); | |
var icon1Width = 0; | |
var icon2Width = 0; | |
if (icon1Size) | |
icon1Width = parseInt(icon1Size); | |
else if (!icon1Size && (icon1Type === IconTypeAppleTouchIcon || icon1Type === IconTypeAppleTouchIconPrecomposed)) | |
icon1Width = AppleTouchIconDefaultWidth; | |
if (icon2Size) | |
icon2Width = parseInt(icon2Size); | |
else if (!icon2Size && (icon2Type === IconTypeAppleTouchIcon || icon2Type === IconTypeAppleTouchIconPrecomposed)) | |
icon2Width = AppleTouchIconDefaultWidth; | |
if (icon2Width > icon1Width) | |
return 1; | |
if (icon2Width < icon1Width) | |
return -1; | |
// A Precomposed icon should come first if both icons have the same size. | |
if (icon1Type !== IconTypeAppleTouchIconPrecomposed && icon2Type === IconTypeAppleTouchIconPrecomposed) | |
return 1; | |
if (icon2Type !== IconTypeAppleTouchIconPrecomposed && icon1Type === IconTypeAppleTouchIconPrecomposed) | |
return -1; | |
return 0; | |
} | |
return Array.prototype.slice.call(extractedIcons).sort(compareIconsDescending); | |
}, | |
extractTouchIconURLs : function () { | |
var icons = this.extractAndSortIconsWithSelector("link[rel=apple-touch-icon], link[rel=apple-touch-icon-precomposed]"); | |
var urls = []; | |
for (var i = 0; i < icons.length; ++i) { | |
var url = icons[i].href; | |
if (url) | |
urls.push(this.resolveURL(url)); | |
} | |
return urls; | |
}, | |
extractFavIconURLs : function () { | |
var icons = this.extractAndSortIconsWithSelector("link[rel=icon], link[rel='shortcut icon']"); | |
var urls = []; | |
for (var i = 0; i < icons.length; ++i) { | |
var url = icons[i].href; | |
if (url) | |
urls.push(this.resolveURL(url)); | |
} | |
return urls; | |
}, | |
usesActivityPub : function () { | |
return !!this.doc.head?.querySelector("link[type='application/activity+json']"); | |
}, | |
parseKeyValueString : function (s) { | |
let pairs = {}; | |
if (!s) | |
return pairs; | |
const stringPairs = s.split(","); | |
stringPairs.forEach((pair) => { | |
const keyValue = pair.split("="); | |
if (keyValue.length != 2) | |
return; | |
pairs[keyValue[0].trim()] = keyValue[1].trim(); | |
}); | |
return pairs; | |
} | |
} | |
var propertiesDefinitions = { | |
LPMetadataURL : { | |
properties : ["og:url"], | |
transformationFunction : function(val) { return val ? MetadataExtractor.resolveURL(val) : MetadataExtractor.responseURL; }, | |
}, | |
LPMetadataTitle : { | |
properties : ["apple:title", "og:title", "twitter:title"], | |
transformationFunction : function(val) { return val ? val : MetadataExtractor.doc.title; }, | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataDescription : { | |
properties : ["og:description", "twitter:description", "Description", "description"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataAppleDescription : { | |
properties : ["apple:description"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataSiteName : { | |
properties : ["og:site_name"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataItemType : { | |
properties : ["og:type"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataRelatedURL : { | |
properties : ["og:see_also"], | |
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val); }, | |
}, | |
LPMetadataCreator : { | |
properties : ["author"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataCreatorFacebookProfile : { | |
properties : ["article:author"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataCreatorTwitterUsername : { | |
properties : ["twitter:creator", "twitter:creator:id"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataTwitterCard : { | |
properties : ["twitter:card"], | |
stripLeadingAndTrailingWhitespace : true, | |
decodeHTMLEntities : true, | |
}, | |
LPMetadataAppleContentID : { | |
properties : ["apple:content_id"], | |
}, | |
LPMetadataImages : { | |
repeatProperties : ["og:image:secure_url", "og:image", "og:image:url", "twitter:image", "image"], | |
properties : ["og:image:secure_url", "og:image", "og:image:url", "og:image:width", "og:image:height", "og:image:type", "twitter:image:alt", "twitter:image", "image"], | |
children : { | |
LPMetadataImageURL : { | |
properties : ["og:image:secure_url", "og:image", "og:image:url", "twitter:image", "image"], | |
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val); }, | |
}, | |
LPMetadataImageWidth : { | |
properties : ["og:image:width"], | |
transformationFunction : function(val) { return parseInt(val, 10); }, | |
}, | |
LPMetadataImageHeight : { | |
properties : ["og:image:height"], | |
transformationFunction : function(val) { return parseInt(val, 10); }, | |
}, | |
LPMetadataImageType : { | |
properties : ["og:image:type"], | |
}, | |
LPMetadataImageAccessibilityText : { | |
properties : ["twitter:image:alt"], | |
} | |
}, | |
equivalencyProperty : "LPMetadataImageURL", | |
}, | |
LPMetadataVideos : { | |
repeatProperties : ["og:video:secure_url", "og:video", "og:video:url"], | |
properties : ["og:video:secure_url", "og:video", "og:video:url", "og:video:width", "og:video:height", "og:video:type"], | |
children : { | |
LPMetadataVideoURL : { | |
properties : ["og:video:secure_url", "og:video", "og:video:url"], | |
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val) }, | |
}, | |
LPMetadataVideoWidth : { | |
properties : ["og:video:width"], | |
transformationFunction : function(val) { return parseInt(val, 10) }, | |
}, | |
LPMetadataVideoHeight : { | |
properties : ["og:video:height"], | |
transformationFunction : function(val) { return parseInt(val, 10) }, | |
}, | |
LPMetadataVideoType : { | |
properties : ["og:video:type"], | |
}, | |
}, | |
equivalencyProperty : "LPMetadataVideoURL", | |
}, | |
LPMetadataStreamingVideos : { | |
repeatProperties : ["twitter:player:stream"], | |
properties : ["twitter:player:stream", "twitter:player:stream:content_type"], | |
children : { | |
LPMetadataVideoURL : { | |
properties : ["twitter:player:stream"], | |
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val) }, | |
}, | |
LPMetadataVideoType : { | |
properties : ["twitter:player:stream:content_type"], | |
}, | |
}, | |
equivalencyProperty : "LPMetadataVideoURL", | |
}, | |
LPMetadataAudios : { | |
repeatProperties : ["og:audio:secure_url", "og:audio", "og:audio:url"], | |
properties : ["og:audio:secure_url", "og:audio", "og:audio:url", "og:audio:type"], | |
children : { | |
LPMetadataAudioURL : { | |
properties : ["og:audio:secure_url", "og:audio", "og:audio:url"], | |
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val) }, | |
}, | |
LPMetadataAudioType : { | |
properties : ["og:audio:type"], | |
}, | |
}, | |
equivalencyProperty : "LPMetadataAudioURL", | |
}, | |
LPMetadataAssociatedApplication : { | |
properties : ["apple-itunes-app"], | |
transformationFunction : function(val) { return MetadataExtractor.parseKeyValueString(val); }, | |
}, | |
} | |
function propertyDefinitionNameForProperty(key) | |
{ | |
for (var propertyDefinitionName in propertiesDefinitions) { | |
if (!propertiesDefinitions[propertyDefinitionName].properties.includes(key)) | |
continue; | |
return propertyDefinitionName; | |
} | |
return undefined; | |
} | |
function namespaceForMetadataKey(key) | |
{ | |
const firstColonIndex = key.indexOf(":"); | |
if (firstColonIndex == -1) | |
return ""; | |
return key.substring(0, firstColonIndex).toLowerCase(); | |
} | |
function resolvePropertyWithDefinition(result, values, propertyDefinition, propertyDefinitionName) | |
{ | |
var value = undefined; | |
for (var propertyName of propertyDefinition.properties) { | |
value = values[propertyName]; | |
if (value) | |
break; | |
} | |
var transformationFunction = propertyDefinition.transformationFunction; | |
if (transformationFunction) | |
value = transformationFunction(value); | |
if (propertyDefinition.stripLeadingAndTrailingWhitespace) { | |
if (typeof value == "string") | |
value = value.trim(); | |
} | |
if (value) | |
result[propertyDefinitionName] = value; | |
} | |
function resolveProperties(result, collectedValues, definitions) | |
{ | |
for (var propertyDefinitionName in definitions) { | |
const propertyDefinition = definitions[propertyDefinitionName]; | |
if (!propertyDefinition) | |
continue; | |
resolvePropertyWithDefinition(result, collectedValues, propertyDefinition, propertyDefinitionName); | |
} | |
} | |
function resolveRepeatedProperty(collectedValues, repeatedPropertyName) | |
{ | |
var result = {}; | |
const propertyDefinition = propertiesDefinitions[repeatedPropertyName]; | |
resolveProperties(result, collectedValues, propertyDefinition.children) | |
result["LPMetadataNamespace"] = namespaceForMetadataKey(Object.keys(collectedValues)[0]); | |
if (!MetadataExtractor.result.hasOwnProperty(repeatedPropertyName)) | |
MetadataExtractor.result[repeatedPropertyName] = []; | |
if (propertyDefinition.hasOwnProperty("equivalencyProperty")) { | |
var equivalencyProperty = propertyDefinition["equivalencyProperty"]; | |
for (var otherResult of MetadataExtractor.result[repeatedPropertyName]) { | |
if (!otherResult.hasOwnProperty(equivalencyProperty) || !result.hasOwnProperty(equivalencyProperty)) | |
continue; | |
if (otherResult[equivalencyProperty] != result[equivalencyProperty]) | |
continue; | |
// Merge missing child properties into the existing item. | |
Object.keys(propertyDefinition.children).forEach(childPropertyName => { | |
if (!otherResult.hasOwnProperty(childPropertyName) && result.hasOwnProperty(childPropertyName)) | |
otherResult[childPropertyName] = result[childPropertyName]; | |
}); | |
return; | |
} | |
} | |
MetadataExtractor.result[repeatedPropertyName].push(result); | |
} | |
class ObjectWithDefaultValues | |
{ | |
constructor(type) { | |
return new Proxy({}, { | |
get: function(object, property) { | |
if (!object.hasOwnProperty(property)) | |
object[property] = new type; | |
return object[property]; | |
} | |
}); | |
} | |
} | |
function removeRepeatedPropertiesFromLessPopulousNamespaces() | |
{ | |
for (const [rootProperty, children] of Object.entries(MetadataExtractor.result)) { | |
const childrenByNamespace = new ObjectWithDefaultValues(Array); | |
for (const child of children) | |
childrenByNamespace[child["LPMetadataNamespace"]].push(child); | |
var mostPopulousNamespace = null; | |
for (const [namespace, childrenOfNamespace] of Object.entries(childrenByNamespace)) { | |
if (!mostPopulousNamespace || childrenOfNamespace.length > childrenByNamespace[mostPopulousNamespace].length) | |
mostPopulousNamespace = namespace; | |
} | |
MetadataExtractor.result[rootProperty] = childrenByNamespace[mostPopulousNamespace]; | |
} | |
} | |
function hasAnyKeysNotInArray(dict, arr) | |
{ | |
for (var key in dict) { | |
if (!arr.includes(key)) | |
return true; | |
} | |
return false; | |
} | |
function decodeHTMLEntities(rawString) | |
{ | |
var el = document.createElement("textarea"); | |
el.innerHTML = rawString; | |
return el.value; | |
} | |
function parseMetaTags() | |
{ | |
var allMetaElements = MetadataExtractor.doc.getElementsByTagName("meta"); | |
var rootCollectedValues = {}; | |
var childCollectedValues = new ObjectWithDefaultValues(Object); | |
for (var i = 0; i < allMetaElements.length; i++) { | |
var el = allMetaElements[i]; | |
var property = el.getAttribute("property") || el.getAttribute("name") || el.getAttribute("itemprop"); | |
var content = el.content; | |
if (!property || !content) | |
continue; | |
var propertyDefinitionName = propertyDefinitionNameForProperty(property); | |
if (!propertyDefinitionName) | |
continue; | |
var propertyDefinition = propertiesDefinitions[propertyDefinitionName]; | |
if (!propertyDefinition) | |
continue; | |
if (propertyDefinition.decodeHTMLEntities) | |
content = decodeHTMLEntities(content); | |
if (!propertyDefinition.hasOwnProperty("repeatProperties")) { | |
// Not a repeating property, so take the first value. | |
if (!rootCollectedValues.hasOwnProperty(property)) | |
rootCollectedValues[property] = content; | |
} else { | |
// A repeating property. | |
if (propertyDefinition.repeatProperties.includes(property)) { | |
var collectedValuesForProperty = childCollectedValues[propertyDefinitionName]; | |
if (collectedValuesForProperty && (hasAnyKeysNotInArray(collectedValuesForProperty, propertyDefinition.repeatProperties) || Object.keys(collectedValuesForProperty).includes(property))) { | |
resolveRepeatedProperty(collectedValuesForProperty, propertyDefinitionName); | |
childCollectedValues[propertyDefinitionName] = {}; | |
} | |
} | |
childCollectedValues[propertyDefinitionName][property] = content; | |
} | |
} | |
for (var propertyDefinitionName in childCollectedValues) | |
resolveRepeatedProperty(childCollectedValues[propertyDefinitionName], propertyDefinitionName); | |
removeRepeatedPropertiesFromLessPopulousNamespaces(); | |
resolveProperties(MetadataExtractor.result, rootCollectedValues, propertiesDefinitions); | |
} | |
MetadataExtractor.responseURL = window.location.href; | |
MetadataExtractor.doc = document; | |
try { | |
parseMetaTags(); | |
const touchIcons = MetadataExtractor.extractTouchIconURLs(); | |
const favIcons = MetadataExtractor.extractFavIconURLs(); | |
const usesActivityPub = MetadataExtractor.usesActivityPub(); | |
MetadataExtractor.result["LPMetadataIcons"] = touchIcons.concat(favIcons); | |
MetadataExtractor.result["LPMetadataSelectedText"] = window.getSelection().toString(); | |
MetadataExtractor.result["LPMetadataUsesActivityPub"] = usesActivityPub; | |
return MetadataExtractor.result; | |
} catch(e) { | |
return { error: "Exception: " + e }; | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment