Skip to content

Instantly share code, notes, and snippets.

@arrieta
Last active February 21, 2018 16:24
Show Gist options
  • Save arrieta/845646d71c6beee5c5f7cff95acca382 to your computer and use it in GitHub Desktop.
Save arrieta/845646d71c6beee5c5f7cff95acca382 to your computer and use it in GitHub Desktop.
Lexer for the Planetary Data System (PDS) Object Description Language (ODL)
/*
This program implements a lexer for the Object Description Language (ODL), a legacy metadata format
used by NASA's Planetary Data System (PDS).
As of this writing, the ODL version is 2.1, and the specification can be found in:
https://pds.jpl.nasa.gov/documents/sr/Chapter12.pdf
This lexer simply emits the tokens needed by an ODL parser, which is provided as a separate program.
(C) Nabla Zero Labs, 2018
*/
"use strict";
const TOKEN_NAMES = [
"NEWLINE", "TAB", "SPACE",
"COLON", "COMMA", "PERIOD",
"LPAR", "RPAR", "LCURLY", "RCURLY", "LSQUARE", "RSQUARE",
"LT", "GT", "EQUAL",
"PLUS", "DASH", "ASTERISK", "SLASH",
"CIRCUMFLEX", "AT", "HASH", "AMPERSAND", "DOLLAR",
"SQUOTE", "DQUOTE",
"INTEGER", "IDENTIFIER", "COMMENT",
"STRING", "SYMBOL"
];
const make_tokens = function () {
let tokens = {};
TOKEN_NAMES.forEach(function (name, index) {
tokens[name] = index;
});
return Object.freeze(tokens);
};
const Token = make_tokens();
const ATOMS = {
"\n": Token.NEWLINE,
"\t": Token.TAB,
" ": Token.SPACE,
":": Token.COLON,
",": Token.COMMA,
".": Token.PERIOD,
"(": Token.LPAR,
")": Token.RPAR,
"{": Token.LCURLY,
"}": Token.RCURLY,
"[": Token.LSQUARE,
"]": Token.RSQUARE,
"<": Token.LT,
">": Token.GT,
"=": Token.EQUAL,
"+": Token.PLUS,
"-": Token.DASH,
"*": Token.ASTERISK,
"/": Token.SLASH,
"^": Token.CIRCUMFLEX,
"@": Token.AT,
"#": Token.HASH,
"&": Token.AMPERSAND,
"$": Token.DOLLAR,
"'": Token.SQUOTE,
"\"": Token.DQUOTE
};
const is_alpha = (c) => (/^[A-Za-z]$/).test(c);
const is_digit = (c) => (/^[0-9]$/).test(c);
const is_identifier_char = (c) => (/^[A-Za-z0-9_]$/).test(c);
const is_space = (c) => ((c === " ") || (c === "\t") || (c === "\n") || (c === "\r"));
const lex = function (s) {
let at = 0;
const len = s.length;
const peek = () => s[at];
const get = function () {
const c = s[at];
at += 1;
return c;
};
const error = function (m) {
throw {"what": "SyntaxError", "at": at, "message": m};
};
const comment = function () {
let token = {token: Token.COMMENT, lexeme: "", done: false};
get(); // eat leading `*`
while (true) {
if (peek() === "*") {
get();
if (peek() === "/") {
token.lexeme = token.lexeme.trim(); // remove whitespace padding
get();
return token;
}
token.lexeme += "*";
}
token.lexeme += get();
}
};
const delimited = function (delimiter) {
let token = {token: undefined, lexeme: "", done: false};
while (peek() !== delimiter) {
token.lexeme += get();
}
get(); // eat trailing delimiter
if (delimiter === "'") {
token.token = Token.SYMBOL;
} else if (delimiter === "\"") {
token.token = Token.STRING;
} else {
error("unknown delimiter syntax");
}
return token;
};
const next = function () {
let token = undefined;
while (at < len) {
while (is_space(peek())) {
get();
}
let c = get();
let t = ATOMS[c];
if (t !== undefined) {
if ((c === "\"") || (c === "'")) {
return delimited(c);
}
if ((c === "/") && peek() === "*") {
return comment();
}
return {token: t, lexeme: c, done: false};
}
if (is_alpha(c)) {
token = {token: Token.IDENTIFIER, lexeme: c, done: false};
while (is_identifier_char(peek())) {
token.lexeme += get();
}
if (token.lexeme.slice(-1) === "_") {
error("Identifiers cannot end with an underscore.");
}
return token;
}
if (is_digit(c)) {
token = {token: Token.INTEGER, lexeme: c, done: false};
while (is_digit(peek())) {
token.lexeme += get();
}
return token;
}
if (c === undefined) {
break;
}
error(`unexpected character ${c}`);
}
// iterator sentinel
return {token: undefined, lexeme: undefined, done: true};
};
return {next: next};
};
@arrieta
Copy link
Author

arrieta commented Feb 21, 2018

Given the input:

PDS_VERSION_ID                  = PDS3
RECORD_TYPE                     = FIXED_LENGTH
RECORD_BYTES                    = 1600
FILE_RECORDS                    = 802
^VICAR_HEADER                   = ("C1044259_CALIB.IMG", 1)
^IMAGE                          = ("C1044259_CALIB.IMG", 2)
^VICAR_EXTENSION_HEADER         = ("C1044259_CALIB.IMG", 802)

DATA_SET_ID                     = "VG2-N-ISS-2/3/4/6-PROCESSED-V1.0"
PRODUCT_ID                      = "C1044259_CALIB.IMG"
PRODUCT_CREATION_TIME           = 2013-10-23T16:00:00
SOURCE_PRODUCT_ID               = ("C1044259_CLEANED.IMG",
                                   "DC2_NA_11_X100_15.IMG",
                                   "FICOR77_VG2_NA_CLEAR.DAT",
                                   "VGRSCF.DAT")
PRODUCT_TYPE                    = CALIBRATED_IMAGE

/* Image Description  */

INSTRUMENT_HOST_NAME            = "VOYAGER 2"
INSTRUMENT_HOST_ID              = VG2
INSTRUMENT_NAME                 = "IMAGING SCIENCE SUBSYSTEM - NARROW ANGLE"
INSTRUMENT_ID                   = "ISSN"
MISSION_PHASE_NAME              = "NEPTUNE ENCOUNTER"
TARGET_NAME                     = "NEREID"
IMAGE_ID                        = "0758N2-032"
IMAGE_NUMBER                    = "10442.59"
IMAGE_TIME                      = 1989-07-24T14:05:36.00
EARTH_RECEIVED_TIME             = UNK
SCAN_MODE_ID                    = "1:1"
SHUTTER_MODE_ID                 = "NAONLY"
GAIN_MODE_ID                    = "LOW"
EDIT_MODE_ID                    = "1:1"
FILTER_NAME                     = "CLEAR"
FILTER_NUMBER                   = "0"
EXPOSURE_DURATION               = 15.3600 <SECOND>
START_TIME                      = 1989-07-24T14:05:20.64
STOP_TIME                       = 1989-07-24T14:05:36.00
SPACECRAFT_CLOCK_START_COUNT    = "10442:58:544"
SPACECRAFT_CLOCK_STOP_COUNT     = "10442:59:001"

NOTE                            =
"OPTICAL NAVIGATION."

DESCRIPTION                     = "This image is the result of calibrating the
corresponding CLEANED image (C1044259_CLEANED.IMG). It was created using the
VICAR software package on a Compaq Alpha running OpenVMS. Routine FICOR77
subtracted a background 'dark current' image (DC2_NA_11_X100_15.IMG) and
converted from dimensionless raw numbers to two-byte integers that are
proportional to I/F. The REFLECTANCE_SCALING_FACTOR value listed below defines
the conversion factor.

Note that this image contains geometric distortion, so the values for the
horizontal and vertical pixel fields of view listed below are only
approximate. See the corresponding GEOMED file (C1044259_GEOMED.IMG) for a
geometrically corrected version. See file DOCUMENT/PROCESSING.TXT for more
information about the image processing."

OBJECT                          = VICAR_HEADER
  HEADER_TYPE                   = VICAR
  BYTES                         = 1600
  RECORDS                       = 1
  INTERCHANGE_FORMAT            = ASCII
  DESCRIPTION                   = "VICAR format label for the image."
END_OBJECT                      = VICAR_HEADER

OBJECT                          = IMAGE
  LINES                         = 800
  LINE_SAMPLES                  = 800
  SAMPLE_TYPE                   = LSB_INTEGER
  SAMPLE_BITS                   = 16
  SAMPLE_DISPLAY_DIRECTION      = RIGHT
  LINE_DISPLAY_DIRECTION        = DOWN
  HORIZONTAL_PIXEL_FOV          = 5.2200E-04 <DEGREE> /* approximate */
  VERTICAL_PIXEL_FOV            = 5.2200E-04 <DEGREE> /* approximate */
  HORIZONTAL_FOV                = 0.4176     <DEGREE> /* approximate */
  VERTICAL_FOV                  = 0.4176     <DEGREE> /* approximate */
  REFLECTANCE_SCALING_FACTOR    = 1.0000E-04
END_OBJECT                      = IMAGE

OBJECT                          = VICAR_EXTENSION_HEADER
  HEADER_TYPE                   = VICAR
  BYTES                         = 1600
  RECORDS                       = 1
  INTERCHANGE_FORMAT            = ASCII
  DESCRIPTION                   = "Continuation of the VICAR header."
END_OBJECT                      = VICAR_EXTENSION_HEADER

END

The program

let tokens = lex(s); // s is the input
while (true) {
    const t = tokens.next();
    if (t.done) {
        break;
    } else {
        console.log(`${TOKEN_NAMES[t.token]}: ${t.lexeme}`);
    }
}

produces the output

IDENTIFIER: PDS_VERSION_ID
EQUAL: =
IDENTIFIER: PDS3
IDENTIFIER: RECORD_TYPE
EQUAL: =
IDENTIFIER: FIXED_LENGTH
IDENTIFIER: RECORD_BYTES
EQUAL: =
INTEGER: 1600
IDENTIFIER: FILE_RECORDS
EQUAL: =
INTEGER: 802
CIRCUMFLEX: ^
IDENTIFIER: VICAR_HEADER
EQUAL: =
LPAR: (
STRING: C1044259_CALIB.IMG
COMMA: ,
INTEGER: 1
RPAR: )
CIRCUMFLEX: ^
IDENTIFIER: IMAGE
EQUAL: =
LPAR: (
STRING: C1044259_CALIB.IMG
COMMA: ,
INTEGER: 2
RPAR: )
CIRCUMFLEX: ^
IDENTIFIER: VICAR_EXTENSION_HEADER
EQUAL: =
LPAR: (
STRING: C1044259_CALIB.IMG
COMMA: ,
INTEGER: 802
RPAR: )
IDENTIFIER: DATA_SET_ID
EQUAL: =
STRING: VG2-N-ISS-2/3/4/6-PROCESSED-V1.0
IDENTIFIER: PRODUCT_ID
EQUAL: =
STRING: C1044259_CALIB.IMG
IDENTIFIER: PRODUCT_CREATION_TIME
EQUAL: =
INTEGER: 2013
DASH: -
INTEGER: 10
DASH: -
INTEGER: 23
IDENTIFIER: T16
COLON: :
INTEGER: 00
COLON: :
INTEGER: 00
IDENTIFIER: SOURCE_PRODUCT_ID
EQUAL: =
LPAR: (
STRING: C1044259_CLEANED.IMG
COMMA: ,
STRING: DC2_NA_11_X100_15.IMG
COMMA: ,
STRING: FICOR77_VG2_NA_CLEAR.DAT
COMMA: ,
STRING: VGRSCF.DAT
RPAR: )
IDENTIFIER: PRODUCT_TYPE
EQUAL: =
IDENTIFIER: CALIBRATED_IMAGE
COMMENT: Image Description
IDENTIFIER: INSTRUMENT_HOST_NAME
EQUAL: =
STRING: VOYAGER 2
IDENTIFIER: INSTRUMENT_HOST_ID
EQUAL: =
IDENTIFIER: VG2
IDENTIFIER: INSTRUMENT_NAME
EQUAL: =
STRING: IMAGING SCIENCE SUBSYSTEM - NARROW ANGLE
IDENTIFIER: INSTRUMENT_ID
EQUAL: =
STRING: ISSN
IDENTIFIER: MISSION_PHASE_NAME
EQUAL: =
STRING: NEPTUNE ENCOUNTER
IDENTIFIER: TARGET_NAME
EQUAL: =
STRING: NEREID
IDENTIFIER: IMAGE_ID
EQUAL: =
STRING: 0758N2-032
IDENTIFIER: IMAGE_NUMBER
EQUAL: =
STRING: 10442.59
IDENTIFIER: IMAGE_TIME
EQUAL: =
INTEGER: 1989
DASH: -
INTEGER: 07
DASH: -
INTEGER: 24
IDENTIFIER: T14
COLON: :
INTEGER: 05
COLON: :
INTEGER: 36
PERIOD: .
INTEGER: 00
IDENTIFIER: EARTH_RECEIVED_TIME
EQUAL: =
IDENTIFIER: UNK
IDENTIFIER: SCAN_MODE_ID
EQUAL: =
STRING: 1:1
IDENTIFIER: SHUTTER_MODE_ID
EQUAL: =
STRING: NAONLY
IDENTIFIER: GAIN_MODE_ID
EQUAL: =
STRING: LOW
IDENTIFIER: EDIT_MODE_ID
EQUAL: =
STRING: 1:1
IDENTIFIER: FILTER_NAME
EQUAL: =
STRING: CLEAR
IDENTIFIER: FILTER_NUMBER
EQUAL: =
STRING: 0
IDENTIFIER: EXPOSURE_DURATION
EQUAL: =
INTEGER: 15
PERIOD: .
INTEGER: 3600
LT: <
IDENTIFIER: SECOND
GT: >
IDENTIFIER: START_TIME
EQUAL: =
INTEGER: 1989
DASH: -
INTEGER: 07
DASH: -
INTEGER: 24
IDENTIFIER: T14
COLON: :
INTEGER: 05
COLON: :
INTEGER: 20
PERIOD: .
INTEGER: 64
IDENTIFIER: STOP_TIME
EQUAL: =
INTEGER: 1989
DASH: -
INTEGER: 07
DASH: -
INTEGER: 24
IDENTIFIER: T14
COLON: :
INTEGER: 05
COLON: :
INTEGER: 36
PERIOD: .
INTEGER: 00
IDENTIFIER: SPACECRAFT_CLOCK_START_COUNT
EQUAL: =
STRING: 10442:58:544
IDENTIFIER: SPACECRAFT_CLOCK_STOP_COUNT
EQUAL: =
STRING: 10442:59:001
IDENTIFIER: NOTE
EQUAL: =
STRING: OPTICAL NAVIGATION.
IDENTIFIER: DESCRIPTION
EQUAL: =
STRING: This image is the result of calibrating the
corresponding CLEANED image (C1044259_CLEANED.IMG). It was created using the
VICAR software package on a Compaq Alpha running OpenVMS. Routine FICOR77
subtracted a background 'dark current' image (DC2_NA_11_X100_15.IMG) and
converted from dimensionless raw numbers to two-byte integers that are
proportional to I/F. The REFLECTANCE_SCALING_FACTOR value listed below defines
the conversion factor.

Note that this image contains geometric distortion, so the values for the
horizontal and vertical pixel fields of view listed below are only
approximate. See the corresponding GEOMED file (C1044259_GEOMED.IMG) for a
geometrically corrected version. See file DOCUMENT/PROCESSING.TXT for more
information about the image processing.
IDENTIFIER: OBJECT
EQUAL: =
IDENTIFIER: VICAR_HEADER
IDENTIFIER: HEADER_TYPE
EQUAL: =
IDENTIFIER: VICAR
IDENTIFIER: BYTES
EQUAL: =
INTEGER: 1600
IDENTIFIER: RECORDS
EQUAL: =
INTEGER: 1
IDENTIFIER: INTERCHANGE_FORMAT
EQUAL: =
IDENTIFIER: ASCII
IDENTIFIER: DESCRIPTION
EQUAL: =
STRING: VICAR format label for the image.
IDENTIFIER: END_OBJECT
EQUAL: =
IDENTIFIER: VICAR_HEADER
IDENTIFIER: OBJECT
EQUAL: =
IDENTIFIER: IMAGE
IDENTIFIER: LINES
EQUAL: =
INTEGER: 800
IDENTIFIER: LINE_SAMPLES
EQUAL: =
INTEGER: 800
IDENTIFIER: SAMPLE_TYPE
EQUAL: =
IDENTIFIER: LSB_INTEGER
IDENTIFIER: SAMPLE_BITS
EQUAL: =
INTEGER: 16
IDENTIFIER: SAMPLE_DISPLAY_DIRECTION
EQUAL: =
IDENTIFIER: RIGHT
IDENTIFIER: LINE_DISPLAY_DIRECTION
EQUAL: =
IDENTIFIER: DOWN
IDENTIFIER: HORIZONTAL_PIXEL_FOV
EQUAL: =
INTEGER: 5
PERIOD: .
INTEGER: 2200
IDENTIFIER: E
DASH: -
INTEGER: 04
LT: <
IDENTIFIER: DEGREE
GT: >
COMMENT: approximate
IDENTIFIER: VERTICAL_PIXEL_FOV
EQUAL: =
INTEGER: 5
PERIOD: .
INTEGER: 2200
IDENTIFIER: E
DASH: -
INTEGER: 04
LT: <
IDENTIFIER: DEGREE
GT: >
COMMENT: approximate
IDENTIFIER: HORIZONTAL_FOV
EQUAL: =
INTEGER: 0
PERIOD: .
INTEGER: 4176
LT: <
IDENTIFIER: DEGREE
GT: >
COMMENT: approximate
IDENTIFIER: VERTICAL_FOV
EQUAL: =
INTEGER: 0
PERIOD: .
INTEGER: 4176
LT: <
IDENTIFIER: DEGREE
GT: >
COMMENT: approximate
IDENTIFIER: REFLECTANCE_SCALING_FACTOR
EQUAL: =
INTEGER: 1
PERIOD: .
INTEGER: 0000
IDENTIFIER: E
DASH: -
INTEGER: 04
IDENTIFIER: END_OBJECT
EQUAL: =
IDENTIFIER: IMAGE
IDENTIFIER: OBJECT
EQUAL: =
IDENTIFIER: VICAR_EXTENSION_HEADER
IDENTIFIER: HEADER_TYPE
EQUAL: =
IDENTIFIER: VICAR
IDENTIFIER: BYTES
EQUAL: =
INTEGER: 1600
IDENTIFIER: RECORDS
EQUAL: =
INTEGER: 1
IDENTIFIER: INTERCHANGE_FORMAT
EQUAL: =
IDENTIFIER: ASCII
IDENTIFIER: DESCRIPTION
EQUAL: =
STRING: Continuation of the VICAR header.
IDENTIFIER: END_OBJECT
EQUAL: =
IDENTIFIER: VICAR_EXTENSION_HEADER
IDENTIFIER: END

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment