Created
June 27, 2021 00:37
-
-
Save whizzter/58135363ef611ef1a45dcd06ecfc9019 to your computer and use it in GitHub Desktop.
Esprima.NET SourceMap adjustment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Text.RegularExpressions; | |
using Esprima; | |
using Esprima.Ast; | |
// 2clause BSD licence applies. | |
namespace EsprimaSourceMap | |
{ | |
// partial impl for https://sourcemaps.info/spec.html | |
public class EsprimaSourceMapTransformer | |
{ | |
// Regexp to match the sourcemappings comment inside a JS file. | |
static Regex sourceMappingUrlRe = new Regex("^\\s*//#\\s*sourceMappingURL=(.+)$", RegexOptions.Multiline); | |
// The public interface of this | |
public static Esprima.Ast.Script ParseScriptWithSourcemap(string sourceData,string sourceName, Func<string, string> externalSourceMapLoader = null) | |
{ | |
// Comment should be true above if JavaScriptParser supports comments. | |
var start = DateTime.UtcNow; | |
var scriptTree = new Esprima.JavaScriptParser(sourceData, new ParserOptions(sourceName) { Comment = false }).ParseScript(); | |
var stop = DateTime.UtcNow; | |
System.Console.WriteLine("Parsing application time:" + (stop - start).TotalMilliseconds); | |
// Temporary hack to get the sourcemap (should be moved to comment extraction inside ApplySourceMap) | |
var sourceMapMatch = sourceMappingUrlRe.Match(sourceData); | |
if (sourceMapMatch!=null && sourceMapMatch.Success) | |
{ | |
string sourceMapURI = sourceMapMatch.Groups[1].Value; | |
var startRM = DateTime.UtcNow; | |
ApplySourceMap(scriptTree, sourceMapURI, externalSourceMapLoader); // Uri handling should be handled by reading the fileTree | |
var stopRM = DateTime.UtcNow; | |
System.Console.WriteLine("Source-map application time:" + (stopRM - startRM).TotalMilliseconds); | |
} | |
return scriptTree; | |
} | |
// This method should be public and not have a sourceMapData argument but since Esprima seems to discard comments right now we'll hide it for the time being and take in the URI | |
private static void ApplySourceMap(Esprima.Ast.Script userScript, string sourceMapURI, Func<string, string> externalSourceMapLoader = null) | |
{ | |
string sourceMapData = null; | |
if (sourceMapURI.StartsWith("data:")) | |
{ | |
// data-protocol, base64-embedded sourcemap. | |
sourceMapData = System.Text.UTF8Encoding.UTF8.GetString(Convert.FromBase64String(sourceMapURI.Substring(sourceMapURI.IndexOf(',')+1))); | |
} | |
else | |
{ | |
// not embedded, so pass the loading to the loader. | |
if (externalSourceMapLoader != null) | |
{ | |
sourceMapData = externalSourceMapLoader(sourceMapURI); | |
} | |
} | |
if (sourceMapData == null) | |
return; // could not apply it! (hard-fail here?) | |
// We use the Esprima JS parser to parse the JSON (since it's available) | |
var esprimaJSON = new Esprima.JavaScriptParser(sourceMapData).ParseExpression(); | |
SourceMapData sourceMapObject = ToObject(esprimaJSON, typeof(SourceMapData)) as SourceMapData; | |
// for the mapping data we keep the text and an index into it as the state. | |
var mappingDataText = sourceMapObject.Mappings; | |
int mappingDataIndex = 0; | |
// current location (most of the source-mapping data is relative to previous entries) | |
var currentRemappingEntry = new SourceMapRemappingEntry { Line = 0, Column = 0, Source = 0, SourceLine = 0, SourceColumn = 0 }; | |
// copies are pushed onto the remapping list. | |
var remappings = new List<SourceMapRemappingEntry>(); | |
// check-tok is used to check for the existence of special tokens (where index might overflow) | |
char checkTok() | |
{ | |
// extract a check-token (we transform end-of-string to a group-end symbol) | |
return mappingDataIndex >= mappingDataText.Length ? ';' : mappingDataText.CharCodeAt(mappingDataIndex); | |
} | |
// this function is called to check for (and process) end-of-group or end-of-segment states. | |
bool processedSegmentOrGroup(bool doSeg) | |
{ | |
if (doSeg && ',' == checkTok()) | |
{ | |
// push remapping for segment. | |
remappings.Add(currentRemappingEntry); | |
// go to next character | |
mappingDataIndex++; | |
return true; | |
} | |
else if (';' == checkTok()) | |
{ | |
// push remapping for group. | |
remappings.Add(currentRemappingEntry); | |
// manipulate the location state according to the spec. | |
currentRemappingEntry.Line++; | |
currentRemappingEntry.Column = 0; | |
// go to next character | |
mappingDataIndex++; | |
return true; | |
} | |
return false; | |
} | |
// main loop of reading mapping-data | |
while ( mappingDataIndex < mappingDataText.Length ) | |
{ | |
if (processedSegmentOrGroup(false)) | |
continue; // empty-line | |
currentRemappingEntry.Column += base64VLQDecode(mappingDataText, ref mappingDataIndex); | |
if (processedSegmentOrGroup(true)) | |
continue; // what was parsed was a simple column-adjusting entry | |
currentRemappingEntry.Source += base64VLQDecode(mappingDataText, ref mappingDataIndex); | |
currentRemappingEntry.SourceLine += base64VLQDecode(mappingDataText, ref mappingDataIndex); | |
currentRemappingEntry.SourceColumn += base64VLQDecode(mappingDataText, ref mappingDataIndex); | |
if (processedSegmentOrGroup(true)) | |
continue; // a more regular entry that can update more than just the column was found. | |
// TODO: handling this would enable name-demangling. | |
int nameReplacement = base64VLQDecode(mappingDataText, ref mappingDataIndex); | |
if (processedSegmentOrGroup(true)) | |
continue; // a name-remapping entry was found (we don't do anything with these right now) | |
else throw new ArgumentException("Decoding problem"); // should this be a soft-fault? | |
} | |
// Reading the source-data is done at this point, a couple of remapping functions follows and then we just need to invoke them on the tree. | |
// This function locates the appropriate remapping entry for a Position token. | |
SourceMapRemappingEntry? getMapping(Position esPos) | |
{ | |
int mappingLine = esPos.Line - 1; // the source-map format is 0-indexed for lines | |
int mappingCol = esPos.Column; // both Esprima and the source-map format is 0-indexed for columns | |
// do a binary search in our data to find a starting point for our entries | |
int idx = remappings.BinarySearch(new SourceMapRemappingEntry { Line = mappingLine, Column = mappingCol }, lineColumnComparer); | |
if (idx >= 0) | |
{ | |
// in the "lucky" case then the symbol matches exactly for a location and we can just return it directly. | |
return remappings[idx]; | |
} | |
// In many cases a mapping-segment might span multiple tokens/nodes, BinarySearch gave us the nearest "larger" node | |
// First off negate the result to find our nearest. | |
idx = idx ^ (~0); | |
// If it's out of bounds (end of file) then just pick the last entry | |
if (idx >= remappings.Count) | |
idx--; | |
// since BinarySearch returned the "larger than closest" entry we might need to "back-step" slightly in the array to find the correct one. | |
while (idx > 0 && (remappings[idx].Line > mappingLine || (remappings[idx].Line == mappingLine && remappings[idx].Column > mappingCol))) | |
{ | |
// the index's line was larger OR same and the column was larger, then step back | |
idx--; | |
} | |
// however some tokens might be generated and not match up to real source positions. | |
if (remappings[idx].Line != mappingLine || remappings[idx].Column > mappingCol) | |
return null; | |
// this will be the closest preceding entry compared to the tokens position | |
return remappings[idx]; | |
} | |
Position repositionPosition(Position pos) | |
{ | |
var mapping = getMapping(pos); | |
if (!mapping.HasValue) | |
return pos; | |
// if we have a useful mapping then find the offset within it and reproject it to the original source file. | |
int insideMappingOffset = pos.Column - mapping.Value.Column; | |
return new Position(mapping.Value.SourceLine+1, mapping.Value.SourceColumn + insideMappingOffset); | |
} | |
Location repositionLocation(Location esLoc) | |
{ | |
var mapping = getMapping(esLoc.Start); | |
if (!mapping.HasValue) | |
return esLoc; | |
return new Location(repositionPosition(esLoc.Start), repositionPosition(esLoc.End), sourceMapObject.Sources[mapping.Value.Source]); | |
} | |
// this is a recursive node visitor that goes through the source-tree and relocates the source locations. | |
void relocate(Node node) | |
{ | |
if (node == null) | |
return; | |
node.Location = repositionLocation(node.Location); | |
foreach (var child in node.ChildNodes) | |
{ | |
relocate(child); | |
} | |
} | |
// Finally initiate the tree-relocation process. | |
relocate(userScript); | |
} | |
// This struct mimics the JSON data structure of sourcemaps | |
public class SourceMapData | |
{ | |
public double Version { get; set; } | |
public string File { get; set; } | |
public string SourceRoot { get; set; } | |
public List<string> Sources { get; set; } | |
public List<string> SourcesContent { get; set; } | |
public List<string> Names { get; set; } | |
public string Mappings { get; set; } | |
} | |
// These entries are the internal result of parsing groups/segments of the SourceMap JSON mappings field. | |
private struct SourceMapRemappingEntry | |
{ | |
// line in the mapped file | |
public int Line; | |
// column in the mapped file | |
public int Column; | |
// source file name index for this entry | |
public int Source; | |
// line in the original source file | |
public int SourceLine; | |
// column in the original source file | |
public int SourceColumn; | |
} | |
// when searching the remapping entries we're only interested in the line/column part of the data. | |
static Comparer<SourceMapRemappingEntry> lineColumnComparer = Comparer<SourceMapRemappingEntry>.Create((a, b) => a.Line != b.Line ? a.Line - b.Line : a.Column - b.Column); | |
// useful description on https://medium.com/@trungutt/yet-another-explanation-on-sourcemap-669797e418ce | |
private static int base64VLQDecode(string src, ref int idx) | |
{ | |
uint tempValue = 0; | |
// data is read from least significant bits to most, shift keeps track of it. | |
int shift = 0; | |
for (bool continuation = true; continuation;) | |
{ | |
// we get a value between 0-63 here. | |
uint decodedCharVal = b64v(src.CharCodeAt(idx++)); | |
// bit 5 is the continuation flag | |
continuation = 0 != (decodedCharVal & 0x20); | |
// bits 0-4 is the actual data that is shifted into place | |
tempValue |= ((decodedCharVal & 0x1f) << shift); | |
shift += 5; | |
} | |
// flip the value depending on the least-significant bit. | |
return 0 != (tempValue & 1) | |
? -(int)(tempValue >> 1) | |
: (int)(tempValue >> 1); | |
} | |
// decode based on the Base64 alphabet | |
private static uint b64v(char c) | |
{ | |
return (uint)( | |
(c >= 'A' && c <= 'Z') | |
? (int)(c - 'A') | |
: (c >= 'a' && c <= 'z') | |
? (int)(c - 'a' + 26) | |
: (c >= '0' && c <= '9') | |
? (int)(c - '0' + 52) | |
: c == '+' | |
? 62 | |
: c == '/' | |
? 63 | |
: throw new ArgumentException("Invalid data in B64-VLQ " + c)); | |
} | |
// A small routine to de-serialize the JSON data parsed into an Esprima AST into "regular" .NET objects | |
private static object ToObject(Esprima.Ast.Expression expr, Type target = null) | |
{ | |
if (expr is Esprima.Ast.ObjectExpression objectExpression) | |
{ | |
var reflectedOutputValue = target != null ? Activator.CreateInstance(target) : null; | |
var dictionaryOutputValue = target != null ? null : new Dictionary<string, object>(); | |
foreach (var testProperty in objectExpression.Properties) | |
{ | |
if (testProperty is Esprima.Ast.Property prop) | |
{ | |
var key = (prop.Key as Esprima.Ast.Literal).StringValue; | |
if (target != null) | |
{ | |
// get the target property | |
var objProperty = target.GetProperty(key, System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.IgnoreCase | System.Reflection.BindingFlags.Instance); | |
// and set the property via reflection to a value generated by a guided ToObject invocation. | |
objProperty.SetValue(reflectedOutputValue, ToObject(prop.Value, objProperty.PropertyType)); | |
} | |
else | |
{ | |
dictionaryOutputValue[key] = ToObject(prop.Value); | |
} | |
} | |
else throw new ArgumentException(testProperty.GetType().Name); | |
} | |
return target != null ? reflectedOutputValue : dictionaryOutputValue; | |
} | |
else if (expr is Esprima.Ast.ArrayExpression arrayExpression) | |
{ | |
var listElementType = target != null && target.IsGenericType ? target.GenericTypeArguments[0] : null; | |
var outputList = (target != null ? Activator.CreateInstance(target) : new List<object>()) as System.Collections.IList; | |
foreach (var iexp in arrayExpression.Elements) | |
{ | |
outputList.Add(ToObject(iexp, listElementType)); | |
} | |
return outputList; | |
} | |
else if (expr is Esprima.Ast.Literal literalExpression) | |
{ | |
return literalExpression.Value; | |
} | |
else throw new ArgumentException(expr.GetType().Name); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment