Created
February 13, 2025 20:11
-
-
Save TimelessP/1a307f9b23fe0fa83dde581f8fa80c12 to your computer and use it in GitHub Desktop.
larking about
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lark import Lark | |
COBOL_GRAMMAR = r""" | |
// Lark grammar for COBOL DIVISIONS with USING/RETURNING in PROCEDURE DIVISION | |
?start: divisions | |
divisions: division+ | |
// A division consists of a header followed by a body. | |
division: division_header division_body | |
// A division header is one of the standard headers. | |
division_header: identification_division | |
| environment_division | |
| data_division | |
| procedure_division | |
identification_division: IDENTIFICATION_DIVISION | |
environment_division: ENVIRONMENT_DIVISION | |
data_division: DATA_DIVISION | |
// The PROCEDURE DIVISION header may include optional USING and RETURNING clauses. | |
procedure_division: procedure_division_header | |
procedure_division_header: PROCEDURE_DIVISION_KEYWORD using_clause? returning_clause? "." | |
PROCEDURE_DIVISION_KEYWORD: /(?i)PROCEDURE\s+DIVISION/ | |
using_clause: /\s+USING\s+/ parameter_list | |
returning_clause: /\s+RETURNING\s+/ identifier | |
// A parameter list is a comma-separated list of identifiers. | |
parameter_list: identifier (COMMA identifier)* | |
COMMA: "," | |
// The division body may contain both sections and loose paragraphs. | |
division_body: (section | paragraph)* | |
// A section starts with a section header and must contain at least one paragraph. | |
section: section_header paragraph+ | |
section_header: /(?i)[A-Z][A-Z0-9\-]*\s+SECTION\./ | |
// A paragraph may have an optional heading (a label ending with a period) | |
// followed by one or more sentences. | |
paragraph: paragraph_heading? sentence+ | |
paragraph_heading: identifier "." | |
// A sentence is defined as a block of text terminated by a period. | |
sentence: sentence_body "." | |
sentence_body: /[^.]+/ | |
// Identifiers: assume an alphanumeric token starting with a letter (and may include dashes). | |
identifier: IDENTIFIER | |
IDENTIFIER: /[A-Z][A-Z0-9-]*/i | |
// Standard COBOL division headers. | |
IDENTIFICATION_DIVISION: /(?i)IDENTIFICATION\s+DIVISION\./ | |
ENVIRONMENT_DIVISION: /(?i)ENVIRONMENT\s+DIVISION\./ | |
DATA_DIVISION: /(?i)DATA\s+DIVISION\./ | |
%import common.WS | |
%ignore WS | |
""" | |
def main(): | |
# Extended COBOL code sample that includes all division types | |
sample_code = r""" | |
IDENTIFICATION DIVISION. | |
PROGRAM-ID. HELLO-WORLD. | |
AUTHOR. JOHNDOE. | |
ENVIRONMENT DIVISION. | |
CONFIGURATION SECTION. | |
SOURCE-COMPUTER. IBM-370. | |
OBJECT-COMPUTER. IBM-370. | |
INPUT-OUTPUT SECTION. | |
FILE-CONTROL. | |
SELECT INFILE ASSIGN TO 'INPUT.DAT'. | |
SELECT OUTFILE ASSIGN TO 'OUTPUT.DAT'. | |
DATA DIVISION. | |
WORKING-STORAGE SECTION. | |
WS-VAR. | |
PIC 9(4) VALUE 0. | |
LINKAGE SECTION. | |
LS-VAR. | |
PIC X(10). | |
PROCEDURE DIVISION USING ARG1, ARG2 RETURNING RESULT. | |
MAIN-PARAGRAPH. | |
DISPLAY "Hello, World". | |
PERFORM PROCESS-DATA. | |
STOP RUN. | |
PROCESS-DATA. | |
DISPLAY "Processing Data". | |
""" | |
# Create a Lark parser instance using the COBOL grammar. | |
parser = Lark(COBOL_GRAMMAR, start='start', parser='earley') | |
# Parse the extended COBOL code. | |
try: | |
tree = parser.parse(sample_code) | |
print("Parse tree:") | |
print(tree.pretty()) | |
except Exception as e: | |
print("Parsing failed:") | |
print(e) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment