1. /*
  2.  
  3.     Copyright 2012 Robert Pinchbeck
  4.   
  5.     This file is part of AbnfToAntlr.
  6.  
  7.     AbnfToAntlr is free software: you can redistribute it and/or modify
  8.     it under the terms of the GNU General Public License as published by
  9.     the Free Software Foundation, either version 3 of the License, or
  10.     (at your option) any later version.
  11.  
  12.     AbnfToAntlr is distributed in the hope that it will be useful,
  13.     but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15.     GNU General Public License for more details.
  16.  
  17.     You should have received a copy of the GNU General Public License
  18.     along with AbnfToAntlr.  If not, see <http://www.gnu.org/licenses/>.
  19.   
  20. // --------------------------------------------------------------------------------
  21. // ANTLR grammar for constructing Abstract Syntax Trees (AST) of ABNF grammars
  22. //
  23. // Based on RFC 5234 (Augmented BNF for Syntax Specifications)...
  24. // http://tools.ietf.org/html/rfc5234
  25. // --------------------------------------------------------------------------------
  26. */
  27.  
  28. grammar AbnfAst;
  29.  
  30. options
  31. {
  32.     language=CSharp3;
  33.     output=AST;
  34.     ASTLabelType=CommonTree;
  35. }
  36.  
  37. // These virtual tokens are used for each type of Abstract Syntax Tree node
  38. tokens
  39. {
  40.     RULE_LIST_NODE;
  41.     RULE_NODE;
  42.     RULE_NAME_NODE;
  43.  
  44.     ALTERNATION_NODE;
  45.     CONCATENATION_NODE;
  46.     REPETITION_NODE;
  47.     NUMBER_NODE;
  48.     GROUP_NODE;
  49.     OPTION_NODE;
  50.  
  51.     REPEAT_NODE;
  52.     ONE_OCCURENCE;
  53.     ZERO_OCCURENCES;
  54.     ORMORE_OCCURENCES;
  55.     EXACT_OCCURENCES;
  56.  
  57.     CHAR_VAL_NODE;
  58.     BIN_VAL_NODE;
  59.     DEC_VAL_NODE;
  60.     HEX_VAL_NODE;
  61.     PROSE_VAL_NODE;
  62.  
  63.     BIN_VAL_RANGE_NODE;
  64.     BIN_VAL_CONCAT_NODE;
  65.     BIN_VAL_NUMBER_NODE;
  66.  
  67.     DEC_VAL_RANGE_NODE;
  68.     DEC_VAL_CONCAT_NODE;
  69.     DEC_VAL_NUMBER_NODE;
  70.  
  71.     HEX_VAL_RANGE_NODE;
  72.     HEX_VAL_CONCAT_NODE;
  73.     HEX_VAL_NUMBER_NODE;
  74.  
  75. }
  76.  
  77. public start
  78.     :
  79.         rulelist
  80.         ;
  81.  
  82. rulelist
  83.     :
  84.         ( rule | (c_wsp* (c_nl)=>c_nl) )+ -> ^(RULE_LIST_NODE rule+)
  85.         ;
  86.  
  87. rule
  88.     :
  89.         rulename defined_as elements c_nl -> ^(RULE_NODE rulename elements)
  90.         // continues if next line starts
  91.         //  with white space
  92.         ;
  93.  
  94. rulename
  95.     :
  96.         rulechars -> ^(RULE_NAME_NODE rulechars)
  97.         ;
  98.  
  99. rulechars
  100.     :
  101.         ( HEX_ALPHA | OTHER_ALPHA ) ( HEX_ALPHA | OTHER_ALPHA | ZERO | ONE | OTHER_DIGIT | DASH )*
  102.         // this extra rule allows easier rewriting in the rulename rule
  103.         ;
  104.  
  105. defined_as
  106.     :
  107.         c_wsp* ( '=' | '=/' ) c_wsp*
  108.         // basic rules definition and
  109.         //  incremental alternatives
  110.         ;
  111.  
  112. elements
  113.     :
  114.         alternation ((c_wsp)=>c_wsp)* -> alternation
  115.         ;
  116.  
  117. c_wsp
  118.     :
  119.         WSP | ( c_nl WSP )
  120.         ;
  121.  
  122. c_nl
  123.     :
  124.         comment | CRLF
  125.         // comment or newline
  126.         ;
  127.  
  128. comment
  129.     :
  130.         COMMENT
  131.         ;
  132.  
  133. alternation
  134.     :
  135.         concatenation ( c_wsp* '/' c_wsp* concatenation )* -> ^(ALTERNATION_NODE concatenation+)
  136.         ;
  137.  
  138. concatenation
  139.     :
  140.         repetition ( c_wsp+ repetition )* -> ^(CONCATENATION_NODE repetition+)
  141.         ;
  142.  
  143. repetition
  144.     :
  145.         ASTERISK number element                  -> ^(REPETITION_NODE element ZERO_OCCURENCES number)
  146.         | min=number ASTERISK max=number element -> ^(REPETITION_NODE element $min $max)
  147.         | number ASTERISK element                -> ^(REPETITION_NODE element number ORMORE_OCCURENCES)
  148.         | ASTERISK element                       -> ^(REPETITION_NODE element ZERO_OCCURENCES ORMORE_OCCURENCES)
  149.         | number element                         -> ^(REPETITION_NODE element number EXACT_OCCURENCES)
  150.         | element                                -> ^(REPETITION_NODE element ONE_OCCURENCE EXACT_OCCURENCES)
  151.         ;
  152.  
  153. number
  154.     :
  155.         number_val -> ^(NUMBER_NODE number_val)
  156.         ;
  157.  
  158. number_val
  159.     :
  160.         ( ZERO | ONE | OTHER_DIGIT )+
  161.         // this extra rule allows easier rewriting in the number rule
  162.         ;
  163.  
  164. element
  165.     :
  166.         rulename
  167.         | group
  168.         | option
  169.         | char_val
  170.         | num_val
  171.         | prose_val
  172.         ;
  173.  
  174. group
  175.     :
  176.         '(' c_wsp* alternation c_wsp* ')' -> ^(GROUP_NODE alternation)
  177.         ;
  178.  
  179. option
  180.     :
  181.         '[' c_wsp* alternation c_wsp* ']' -> ^(OPTION_NODE alternation)
  182.         ;
  183.  
  184. num_val
  185.     :
  186.         ( bin_val | dec_val | hex_val )
  187.         ;
  188.  
  189. char_val
  190.     :
  191.         CHAR_VAL -> ^(CHAR_VAL_NODE CHAR_VAL)
  192.         // quoted string of SP and VCHAR
  193.         //  without DQUOTE
  194.         ;
  195.  
  196. bin_val
  197.     :
  198.         BIN_VAL_PREFIX min=bin_val_number DASH max=bin_val_number -> ^(BIN_VAL_RANGE_NODE $min $max)
  199.         | BIN_VAL_PREFIX bin_val_number ('.' bin_val_number)+     -> ^(BIN_VAL_CONCAT_NODE bin_val_number+)
  200.         | BIN_VAL_PREFIX bin_val_number                           -> ^(BIN_VAL_NODE bin_val_number)
  201.         // series of concatenated bit values
  202.         //  or single ONEOF range
  203.         ;
  204.  
  205. bin_val_number
  206.     :
  207.         bin_number -> ^(BIN_VAL_NUMBER_NODE bin_number)
  208.         ;
  209.  
  210. bin_number
  211.     :
  212.         ( ZERO | ONE )+
  213.         // this extra rule allows easier rewriting in the bin_val_number rule during AST contruction
  214.         ;
  215.  
  216. dec_val
  217.     :
  218.         DEC_VAL_PREFIX min=dec_val_number DASH max=dec_val_number -> ^(DEC_VAL_RANGE_NODE $min $max)
  219.         | DEC_VAL_PREFIX dec_val_number ('.' dec_val_number)+     -> ^(DEC_VAL_CONCAT_NODE dec_val_number+)
  220.         | DEC_VAL_PREFIX dec_val_number                           -> ^(DEC_VAL_NODE dec_val_number)
  221.         ;
  222.  
  223. dec_val_number
  224.     :
  225.         dec_number -> ^(DEC_VAL_NUMBER_NODE dec_number)
  226.         ;
  227.  
  228. dec_number
  229.     :
  230.         (ZERO | ONE | OTHER_DIGIT)+
  231.         // this extra rule allows easier rewriting in the dec_val_number rule during AST contruction
  232.         ;
  233.  
  234. hex_val
  235.     :
  236.         HEX_VAL_PREFIX min=hex_val_number DASH max=hex_val_number -> ^(HEX_VAL_RANGE_NODE $min $max)
  237.         | HEX_VAL_PREFIX hex_val_number ('.' hex_val_number)+     -> ^(HEX_VAL_CONCAT_NODE hex_val_number+)
  238.         | HEX_VAL_PREFIX hex_val_number                           -> ^(HEX_VAL_NODE hex_val_number)
  239.         ;
  240.  
  241. hex_val_number
  242.     :
  243.         hex_number -> ^(HEX_VAL_NUMBER_NODE hex_number)
  244.         ;
  245.  
  246. hex_number
  247.     :
  248.         (ZERO | ONE | OTHER_DIGIT | HEX_ALPHA)+
  249.         // this extra rule allows easier rewriting in the hex_val_number rule during AST contruction
  250.         ;
  251.  
  252. prose_val
  253.     :
  254.         PROSE_VAL -> ^(PROSE_VAL_NODE PROSE_VAL)
  255.         // bracketed string of SP and VCHAR
  256.         // without angles
  257.         // prose description, to be used as
  258.         // last resort
  259.         ;
  260.  
  261. COMMENT
  262.     :
  263.         ';' ( WSP | VCHAR )* CRLF
  264.         ;
  265.  
  266. CHAR_VAL
  267.     :
  268.         DQUOTE ( '\u0020'..'\u0021' | '\u0023'..'\u007E' )* DQUOTE
  269.         ;
  270.  
  271. BIN_VAL_PREFIX
  272.     :
  273.         '%b'
  274.          ;
  275.  
  276. DEC_VAL_PREFIX
  277.     :
  278.         '%d'
  279.          ;
  280.  
  281. HEX_VAL_PREFIX
  282.     :
  283.         '%x'
  284.          ;
  285.  
  286. PROSE_VAL
  287.     :
  288.         '<' ( '\u0020'..'\u003D' | '\u003F'..'\u007E' )* '>'
  289.         ;
  290.  
  291. HEX_ALPHA
  292.     :
  293.         'A'..'F'
  294.     ;
  295.  
  296. OTHER_ALPHA
  297.     :
  298.         'G'..'Z' | 'a'..'z'
  299.         ;
  300.  
  301. ASTERISK
  302.     :
  303.         '*'
  304.         ;
  305.  
  306. DASH
  307.     :
  308.         '-'
  309.         ;
  310.  
  311. fragment CR
  312.     :
  313.         '\u000D'
  314.         // carriage return
  315.         ;
  316.  
  317. CRLF
  318.     :
  319.         CR LF
  320.         // Internet standard newline
  321.         ;
  322.  
  323. ZERO
  324.     :
  325.         '0'
  326.         ;
  327.  
  328. ONE
  329.     :
  330.         '1'
  331.         ;
  332.  
  333. OTHER_DIGIT
  334.     :
  335.         '2'..'9'
  336.         ;
  337.  
  338. fragment DQUOTE
  339.     :
  340.         '\u0022'
  341.         // " (Double Quote)
  342.         ;
  343.  
  344. fragment HTAB
  345.     :
  346.         '\u0009'
  347.         // horizontal tab
  348.         ;
  349.  
  350. fragment LF
  351.     :
  352.         '\u000A'
  353.         // linefeed
  354.         ;
  355.  
  356. fragment SP
  357.     :
  358.         '\u0020'
  359.         // Space
  360.         ;
  361.  
  362. fragment VCHAR
  363.     :
  364.         '\u0021'..'\u007E'
  365.         // visible (printing) characters
  366.         ;
  367.  
  368. WSP
  369.     :
  370.         (SP | HTAB)+
  371.         // white space
  372.         ;