From 13a5ba652f5eac9fad85c693de21a49259c2f443 Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Wed, 9 Jan 2013 08:28:59 -0500 Subject: [PATCH 1/9] Add systemverilog scanner --- lib/coderay/helpers/file_type.rb | 3 + lib/coderay/scanners/_map.rb | 31 ++-- lib/coderay/scanners/systemverilog.rb | 207 ++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 15 deletions(-) create mode 100644 lib/coderay/scanners/systemverilog.rb diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 637001b8..12d41bc9 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -118,8 +118,11 @@ def shebang filename 'rxml' => :ruby, # 'sch' => :scheme, 'sql' => :sql, + 'sv' => :systemverilog, + 'svh' => :systemverilog, # 'ss' => :scheme, 'tmproj' => :xml, + 'v' => :systemverilog, 'xhtml' => :html, 'xml' => :xml, 'yaml' => :yaml, diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index a240298d..a3c420b2 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -2,21 +2,22 @@ module CodeRay module Scanners map \ - :'c++' => :cpp, - :cplusplus => :cpp, - :ecmascript => :java_script, - :ecma_script => :java_script, - :rhtml => :erb, - :eruby => :erb, - :irb => :ruby, - :javascript => :java_script, - :js => :java_script, - :pascal => :delphi, - :patch => :diff, - :plain => :text, - :plaintext => :text, - :xhtml => :html, - :yml => :yaml + :'c++' => :cpp, + :cplusplus => :cpp, + :ecmascript => :java_script, + :ecma_script => :java_script, + :rhtml => :erb, + :eruby => :erb, + :irb => :ruby, + :javascript => :java_script, + :js => :java_script, + :pascal => :delphi, + :patch => :diff, + :plain => :text, + :plaintext => :text, + :verilog => :systemverilog, + :xhtml => :html, + :yml => :yaml default :text diff --git a/lib/coderay/scanners/systemverilog.rb b/lib/coderay/scanners/systemverilog.rb new file mode 100644 index 00000000..f35ce3eb --- /dev/null +++ b/lib/coderay/scanners/systemverilog.rb @@ -0,0 +1,207 @@ +module CodeRay +module Scanners + + # TODO: Review. Most of this code is copied from the C++ scanner and not + # for applicability for SystemVerilog. + + # Scanner for SystemVerilog + # + # Aliases: +verilog+ + class SystemVerilog < Scanner + + register_for :systemverilog + file_extension 'sv' + title 'SystemVerilog' + + + KEYWORDS = [ + 'accept_on', 'alias', 'always', 'always_comb', 'always_ff', 'always_latch', 'and', 'assert', 'assign', 'assume', 'before', 'begin', 'bind', 'bins', 'binsof', 'break', 'buf', 'bufif0', 'bufif1', 'byte', 'case', 'casex', 'casez', 'cell', 'chandle', 'checker', 'class', 'clocking', 'cmos', 'config', 'const', 'constraint', 'context', 'continue', 'cover', 'covergroup', 'coverpoint', 'cross', 'deassign', 'default', 'defparam', 'design', 'disable', 'dist', 'do', 'edge', 'else', 'end', 'endcase', 'endchecker', 'endclass', 'endclocking', 'endconfig', 'endfunction', 'endgenerate', 'endgroup', 'endinterface', 'endmodule', 'endpackage', 'endprimitive', 'endprogram', 'endproperty', 'endsequence', 'endspecify', 'endtable', 'endtask', 'enum', 'event', 'eventually', 'expect', 'export', 'extends', 'extern', 'final', 'first_match', 'for', 'force', 'foreach', 'forever', 'fork', 'forkjoin', 'function', 'generate', 'genvar', 'global', 'highz0', 'highz1', 'if', 'iff', 'ifnone', 'ignore_bins', 'illegal_bins', 'implies', 'import', 'incdir', 'include', 'initial', 'inout', 'input', 'inside', 'instance', 'interface', 'intersect', 'join', 'join_any', 'join_none', 'large', 'let', 'liblist', 'library', 'local', 'localparam', 'macromodule', 'matches', 'medium', 'modport', 'module', 'nand', 'negedge', 'new', 'nexttime', 'nmos', 'nor', 'noshowcancelled', 'not', 'notif0', 'notif1', 'or', 'output', 'package', 'packed', 'parameter', 'pmos', 'posedge', 'primitive', 'priority', 'program', 'property', 'protected', 'pull0', 'pull1', 'pulldown', 'pullup', 'pulsestyle_ondetect', 'pulsestyle_onevent', 'pure', 'rand', 'randc', 'randcase', 'randsequence', 'rcmos', 'real', 'realtime', 'ref', 'reg', 'reject_on', 'release', 'repeat', 'restrict', 'return', 'rnmos', 'rpmos', 'rtran', 'rtranif0', 'rtranif1', 's_always', 's_eventually', 's_nexttime', 's_until', 's_until_with', 'scalared', 'sequence', 'shortint', 'shortreal', 'showcancelled', 'signed', 'small', 'solve', 'specify', 'specparam', 'static', 'strong', 'strong0', 'strong1', 'struct', 'super', 'supply0', 'supply1', 'sync_accept_on', 'sync_reject_on', 'table', 'tagged', 'task', 'throughout', 'time', 'timeprecision', 'timeunit', 'tran', 'tranif0', 'tranif1', 'tri', 'tri0', 'tri1', 'triand', 'trior', 'trireg', 'type', 'typedef', 'union', 'unique', 'unique0', 'unsigned', 'until', 'until_with', 'untyped', 'use', 'uwire', 'var', 'vectored', 'wait', 'wait_order', 'wand', 'weak', 'weak0', 'weak1', 'while', 'wildcard', 'with', 'within', 'wor', 'xnor', 'xor' + ] # :nodoc: + + PREDEFINED_TYPES = [ + 'bit', 'int', 'integer', 'logic', 'longint', 'string', 'wire' + ] # :nodoc: + PREDEFINED_CONSTANTS = [ + 'null' + ] # :nodoc: + PREDEFINED_VARIABLES = [ + 'this', + ] # :nodoc: + DIRECTIVES = [ + 'automatic', 'virtual', 'void' + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_TYPES, :predefined_type). + add(PREDEFINED_VARIABLES, :local_variable). + add(DIRECTIVES, :directive). + add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + + protected + + def scan_tokens encoder, options + + state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + encoder.text_token match, :comment + + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end + encoder.text_token match, :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) + kind = :label + match << matched + else + label_expected = false + if kind == :keyword + case match + when 'class' + state = :class_name_expected + when 'case', 'default' + case_expected = true + end + end + end + encoder.text_token match, kind + + elsif match = scan(/\$/) + encoder.text_token match, :ident + + elsif match = scan(/L?"/) + encoder.begin_group :string + if match[0] == ?L + encoder.text_token match, 'L', :modifier + match = '"' + end + state = :string + encoder.text_token match, :delimiter + + elsif match = scan(/`[ \t]*(\w*)/) + encoder.text_token match, :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected + state = :include_expected if self[1] == 'include' + + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false + encoder.text_token match, :char + + elsif match = scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false + encoder.text_token match, :hex + + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false + encoder.text_token match, :octal + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer + + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false + encoder.text_token match, :float + + else + encoder.text_token getch, :error + + end + + when :string + if match = scan(/[^\\"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + label_expected = false + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error + state = :initial + label_expected = false + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + when :include_expected + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + state = :initial if match.index ?\n + + else + state = :initial + + end + + when :class_name_expected + if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + encoder.text_token match, :class + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + + else + encoder.text_token getch, :error + state = :initial + + end + + else + raise_inspect 'Unknown state', encoder + + end + + end + + if state == :string + encoder.end_group :string + end + + encoder + end + + end + +end +end From fad9c71fbd0cd7de284c74ad3862d3518ad42575 Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Thu, 10 Jan 2013 15:00:46 +0100 Subject: [PATCH 2/9] Add primitive e and VHDL scanners --- lib/coderay/helpers/file_type.rb | 3 + lib/coderay/scanners/_map.rb | 1 + lib/coderay/scanners/e.rb | 205 +++++++++++++++++++++++++++++++ lib/coderay/scanners/vhdl.rb | 201 ++++++++++++++++++++++++++++++ 4 files changed, 410 insertions(+) create mode 100644 lib/coderay/scanners/e.rb create mode 100644 lib/coderay/scanners/vhdl.rb diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index 12d41bc9..39b741a7 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -84,6 +84,7 @@ def shebang filename 'css' => :css, 'diff' => :diff, 'dpr' => :delphi, + 'e' => :e, 'erb' => :erb, 'gemspec' => :ruby, 'groovy' => :groovy, @@ -123,6 +124,8 @@ def shebang filename # 'ss' => :scheme, 'tmproj' => :xml, 'v' => :systemverilog, + 'vhd' => :vhdl, + 'vhdl' => :vhdl, 'xhtml' => :html, 'xml' => :xml, 'yaml' => :yaml, diff --git a/lib/coderay/scanners/_map.rb b/lib/coderay/scanners/_map.rb index a3c420b2..60c011b8 100644 --- a/lib/coderay/scanners/_map.rb +++ b/lib/coderay/scanners/_map.rb @@ -15,6 +15,7 @@ module Scanners :patch => :diff, :plain => :text, :plaintext => :text, + :specman => :e, :verilog => :systemverilog, :xhtml => :html, :yml => :yaml diff --git a/lib/coderay/scanners/e.rb b/lib/coderay/scanners/e.rb new file mode 100644 index 00000000..c4553b9b --- /dev/null +++ b/lib/coderay/scanners/e.rb @@ -0,0 +1,205 @@ +module CodeRay +module Scanners + + # TODO: Review. Most of this code is copied from the C++ scanner and not + # for applicability for Specman e. + + # Scanner for Specman e + # + # Aliases: +specmen+ + class E < Scanner + + register_for :e + file_extension 'e' + title 'e' + + + KEYWORDS = [ + 'also', 'bits', 'cover', 'each', 'empty', 'extend', 'first', 'for', 'ignore', 'illegal', 'in', 'is', 'item', 'keep', 'like', 'list', 'not', 'only', 'package', 'per_instance', 'radix', 'return', 'soft', 'unit', 'using', 'when' + ] # :nodoc: + + PREDEFINED_TYPES = [ + 'bool', 'bit', 'event', 'string', 'uint' + ] # :nodoc: + PREDEFINED_CONSTANTS = [ + 'FALSE', 'TRUE' + ] # :nodoc: + PREDEFINED_VARIABLES = [ + ] # :nodoc: + DIRECTIVES = [ + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_TYPES, :predefined_type). + add(PREDEFINED_VARIABLES, :local_variable). + add(DIRECTIVES, :directive). + add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + + protected + + def scan_tokens encoder, options + + state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + encoder.text_token match, :comment + + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end + encoder.text_token match, :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) + kind = :label + match << matched + else + label_expected = false + if kind == :keyword + case match + when 'class' + state = :class_name_expected + when 'case', 'default' + case_expected = true + end + end + end + encoder.text_token match, kind + + elsif match = scan(/\$/) + encoder.text_token match, :ident + + elsif match = scan(/L?"/) + encoder.begin_group :string + if match[0] == ?L + encoder.text_token match, 'L', :modifier + match = '"' + end + state = :string + encoder.text_token match, :delimiter + + elsif match = scan(/`[ \t]*(\w*)/) + encoder.text_token match, :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected + state = :include_expected if self[1] == 'include' + + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false + encoder.text_token match, :char + + elsif match = scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false + encoder.text_token match, :hex + + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false + encoder.text_token match, :octal + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer + + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false + encoder.text_token match, :float + + else + encoder.text_token getch, :error + + end + + when :string + if match = scan(/[^\\"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + label_expected = false + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error + state = :initial + label_expected = false + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + when :include_expected + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + state = :initial if match.index ?\n + + else + state = :initial + + end + + when :class_name_expected + if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + encoder.text_token match, :class + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + + else + encoder.text_token getch, :error + state = :initial + + end + + else + raise_inspect 'Unknown state', encoder + + end + + end + + if state == :string + encoder.end_group :string + end + + encoder + end + + end + +end +end diff --git a/lib/coderay/scanners/vhdl.rb b/lib/coderay/scanners/vhdl.rb new file mode 100644 index 00000000..4975c721 --- /dev/null +++ b/lib/coderay/scanners/vhdl.rb @@ -0,0 +1,201 @@ +module CodeRay +module Scanners + + # TODO: Review. Most of this code is copied from the C++ scanner and not + # for applicability for VHDL. + + # Scanner for VHDL + # + class VHDL < Scanner + + register_for :vhdl + file_extension 'vhd' + title 'VHDL' + + # From IEEE Std 1076-2002: Section 13.9 Reserved Words + KEYWORDS = [ +'abs', 'access', 'after', 'alias', 'all', 'and', 'architecture', 'array', 'assert', 'attribute', 'begin', 'block', 'body', 'buffer', 'bus', 'case', 'component', 'configuration', 'constant', 'disconnect', 'downto', 'else', 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function', 'generate', 'generic', 'group', 'guarded', 'if', 'impure', 'in', 'inertial', 'inout', 'is', 'label', 'library', 'linkage', 'literal', 'loop', 'map', 'mod', 'nand', 'new', 'next', 'nor', 'not', 'null', 'of', 'on', 'open', 'or', 'others', 'out', 'package', 'port', 'postponed', 'procedural', 'procedure', 'process', 'protected', 'pure', 'range', 'record', 'reference', 'register', 'reject', 'rem', 'report', 'return', 'rol', 'ror', 'select', 'severity', 'shared', 'signal', 'sla', 'sll', 'sra', 'srl', 'subtype', 'then', 'to', 'transport', 'type', 'unaffected', 'units', 'until', 'use', 'variable', 'wait', 'when', 'while', 'with', 'xnor', 'xor' ] # :nodoc: + + PREDEFINED_TYPES = [ + ] # :nodoc: + PREDEFINED_CONSTANTS = [ + ] # :nodoc: + PREDEFINED_VARIABLES = [ + ] # :nodoc: + DIRECTIVES = [ + ] # :nodoc: + + IDENT_KIND = WordList.new(:ident). + add(KEYWORDS, :keyword). + add(PREDEFINED_TYPES, :predefined_type). + add(PREDEFINED_VARIABLES, :local_variable). + add(DIRECTIVES, :directive). + add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc: + + ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc: + UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc: + + protected + + def scan_tokens encoder, options + + state = :initial + label_expected = true + case_expected = false + label_expected_before_preproc_line = nil + in_preproc_line = false + + until eos? + + case state + + when :initial + + if match = scan(/ \s+ | \\\n /x) + if in_preproc_line && match != "\\\n" && match.index(?\n) + in_preproc_line = false + label_expected = label_expected_before_preproc_line + end + encoder.text_token match, :space + + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + encoder.text_token match, :comment + + elsif match = scan(/ \# \s* if \s* 0 /x) + match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? + encoder.text_token match, :comment + + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) + label_expected = match =~ /[;\{\}]/ + if case_expected + label_expected = true if match == ':' + case_expected = false + end + encoder.text_token match, :operator + + elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + kind = IDENT_KIND[match] + if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/) + kind = :label + match << matched + else + label_expected = false + if kind == :keyword + case match + when 'class' + state = :class_name_expected + when 'case', 'default' + case_expected = true + end + end + end + encoder.text_token match, kind + + elsif match = scan(/\$/) + encoder.text_token match, :ident + + elsif match = scan(/L?"/) + encoder.begin_group :string + if match[0] == ?L + encoder.text_token match, 'L', :modifier + match = '"' + end + state = :string + encoder.text_token match, :delimiter + + elsif match = scan(/`[ \t]*(\w*)/) + encoder.text_token match, :preprocessor + in_preproc_line = true + label_expected_before_preproc_line = label_expected + state = :include_expected if self[1] == 'include' + + elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) + label_expected = false + encoder.text_token match, :char + + elsif match = scan(/0[xX][0-9A-Fa-f]+/) + label_expected = false + encoder.text_token match, :hex + + elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) + label_expected = false + encoder.text_token match, :octal + + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) + label_expected = false + encoder.text_token match, :integer + + elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) + label_expected = false + encoder.text_token match, :float + + else + encoder.text_token getch, :error + + end + + when :string + if match = scan(/[^\\"]+/) + encoder.text_token match, :content + elsif match = scan(/"/) + encoder.text_token match, :delimiter + encoder.end_group :string + state = :initial + label_expected = false + elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) + encoder.text_token match, :char + elsif match = scan(/ \\ | $ /x) + encoder.end_group :string + encoder.text_token match, :error + state = :initial + label_expected = false + else + raise_inspect "else case \" reached; %p not handled." % peek(1), encoder + end + + when :include_expected + if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) + encoder.text_token match, :include + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + state = :initial if match.index ?\n + + else + state = :initial + + end + + when :class_name_expected + if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) + encoder.text_token match, :class + state = :initial + + elsif match = scan(/\s+/) + encoder.text_token match, :space + + else + encoder.text_token getch, :error + state = :initial + + end + + else + raise_inspect 'Unknown state', encoder + + end + + end + + if state == :string + encoder.end_group :string + end + + encoder + end + + end + +end +end From 498512faf1eefbc2dfa488ebb5f8e34c11c8cd1b Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Sat, 12 Jan 2013 08:17:26 +0100 Subject: [PATCH 3/9] Add more e keywords and type. Also some TODOs. --- lib/coderay/scanners/e.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/coderay/scanners/e.rb b/lib/coderay/scanners/e.rb index c4553b9b..5403cf3f 100644 --- a/lib/coderay/scanners/e.rb +++ b/lib/coderay/scanners/e.rb @@ -13,13 +13,17 @@ class E < Scanner file_extension 'e' title 'e' - + # TODO: 0b0101 + # TODO: -- I am a comment + # TODO: 0x0000_0FFF + # TODO: DUT'unique_name + KEYWORDS = [ - 'also', 'bits', 'cover', 'each', 'empty', 'extend', 'first', 'for', 'ignore', 'illegal', 'in', 'is', 'item', 'keep', 'like', 'list', 'not', 'only', 'package', 'per_instance', 'radix', 'return', 'soft', 'unit', 'using', 'when' + 'a', 'also', 'bits', 'cover', 'each', 'else', 'empty', 'extend', 'first', 'for', 'ignore', 'illegal', 'if', 'in', 'is', 'item', 'keep', 'like', 'list', 'not', 'only', 'package', 'per_instance', 'radix', 'result', 'return', 'rf_manager', 'soft', 'sys', 'type', 'unit', 'using', 'var', 'when' ] # :nodoc: PREDEFINED_TYPES = [ - 'bool', 'bit', 'event', 'string', 'uint' + 'bool', 'bit', 'event', 'rf_scalar', 'rf_type', 'string', 'uint' ] # :nodoc: PREDEFINED_CONSTANTS = [ 'FALSE', 'TRUE' From 5124241b1c1aa53b9c42345b257b96b1117d2bb0 Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Sat, 12 Jan 2013 15:56:02 +0100 Subject: [PATCH 4/9] Some keyword sorting. Fixed comment parsing --- lib/coderay/scanners/vhdl.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/coderay/scanners/vhdl.rb b/lib/coderay/scanners/vhdl.rb index 4975c721..3848a42c 100644 --- a/lib/coderay/scanners/vhdl.rb +++ b/lib/coderay/scanners/vhdl.rb @@ -14,7 +14,7 @@ class VHDL < Scanner # From IEEE Std 1076-2002: Section 13.9 Reserved Words KEYWORDS = [ -'abs', 'access', 'after', 'alias', 'all', 'and', 'architecture', 'array', 'assert', 'attribute', 'begin', 'block', 'body', 'buffer', 'bus', 'case', 'component', 'configuration', 'constant', 'disconnect', 'downto', 'else', 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function', 'generate', 'generic', 'group', 'guarded', 'if', 'impure', 'in', 'inertial', 'inout', 'is', 'label', 'library', 'linkage', 'literal', 'loop', 'map', 'mod', 'nand', 'new', 'next', 'nor', 'not', 'null', 'of', 'on', 'open', 'or', 'others', 'out', 'package', 'port', 'postponed', 'procedural', 'procedure', 'process', 'protected', 'pure', 'range', 'record', 'reference', 'register', 'reject', 'rem', 'report', 'return', 'rol', 'ror', 'select', 'severity', 'shared', 'signal', 'sla', 'sll', 'sra', 'srl', 'subtype', 'then', 'to', 'transport', 'type', 'unaffected', 'units', 'until', 'use', 'variable', 'wait', 'when', 'while', 'with', 'xnor', 'xor' ] # :nodoc: +'abs', 'access', 'after', 'alias', 'all', 'and', 'architecture', 'array', 'assert', 'attribute', 'begin', 'block', 'body', 'case', 'component', 'configuration', 'disconnect', 'downto', 'else', 'elsif', 'end', 'entity', 'exit', 'file', 'for', 'function', 'generate', 'generic', 'group', 'guarded', 'if', 'inertial', 'is', 'label', 'library', 'literal', 'loop', 'map', 'mod', 'nand', 'new', 'next', 'nor', 'not', 'null', 'of', 'on', 'open', 'or', 'others', 'package', 'port', 'postponed', 'procedural', 'procedure', 'process', 'protected', 'range', 'record', 'reference', 'reject', 'rem', 'report', 'return', 'rol', 'ror', 'select', 'severity', 'signal', 'sla', 'sll', 'sra', 'srl', 'subtype', 'then', 'to', 'transport', 'type', 'unaffected', 'units', 'until', 'use', 'wait', 'when', 'while', 'with', 'xnor', 'xor' ] # :nodoc: PREDEFINED_TYPES = [ ] # :nodoc: @@ -23,6 +23,7 @@ class VHDL < Scanner PREDEFINED_VARIABLES = [ ] # :nodoc: DIRECTIVES = [ + 'buffer', 'bus', 'constant', 'impure', 'in', 'inout', 'linkage', 'out', 'pure', 'register', 'shared', 'variable' ] # :nodoc: IDENT_KIND = WordList.new(:ident). @@ -58,7 +59,7 @@ def scan_tokens encoder, options end encoder.text_token match, :space - elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + elsif match = scan(%r! -- [^\n\\]* (?: \\. [^\n\\]* )* !mx) encoder.text_token match, :comment elsif match = scan(/ \# \s* if \s* 0 /x) From 13ac0632a1424f31a807c4f194fadbdedea027dd Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Sat, 12 Jan 2013 16:21:08 +0100 Subject: [PATCH 5/9] Fix comments. Underscores in hex. Added binary. --- lib/coderay/scanners/e.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/coderay/scanners/e.rb b/lib/coderay/scanners/e.rb index 5403cf3f..a56e3c73 100644 --- a/lib/coderay/scanners/e.rb +++ b/lib/coderay/scanners/e.rb @@ -13,9 +13,6 @@ class E < Scanner file_extension 'e' title 'e' - # TODO: 0b0101 - # TODO: -- I am a comment - # TODO: 0x0000_0FFF # TODO: DUT'unique_name KEYWORDS = [ @@ -68,6 +65,8 @@ def scan_tokens encoder, options elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) encoder.text_token match, :comment + elsif match = scan(%r! -- [^\n\\]* (?: \\. [^\n\\]* )* !mx) + encoder.text_token match, :comment elsif match = scan(/ \# \s* if \s* 0 /x) match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? @@ -121,7 +120,7 @@ def scan_tokens encoder, options label_expected = false encoder.text_token match, :char - elsif match = scan(/0[xX][0-9A-Fa-f]+/) + elsif match = scan(/0[xX][0-9_A-Fa-f]+/) label_expected = false encoder.text_token match, :hex @@ -129,6 +128,10 @@ def scan_tokens encoder, options label_expected = false encoder.text_token match, :octal + elsif match = scan(/0[b][0-9_]+/) + label_expected = false + encoder.text_token match, :binary + elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) label_expected = false encoder.text_token match, :integer From 53295434dc504f5824ac60f70ed80f51da126459 Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Sat, 12 Jan 2013 16:31:40 +0100 Subject: [PATCH 6/9] Fix comment language. Better identify language. --- lib/coderay/scanners/systemverilog.rb | 4 ++-- lib/coderay/scanners/vhdl.rb | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/coderay/scanners/systemverilog.rb b/lib/coderay/scanners/systemverilog.rb index f35ce3eb..e076656b 100644 --- a/lib/coderay/scanners/systemverilog.rb +++ b/lib/coderay/scanners/systemverilog.rb @@ -2,9 +2,9 @@ module CodeRay module Scanners # TODO: Review. Most of this code is copied from the C++ scanner and not - # for applicability for SystemVerilog. + # applicable for SystemVerilog. - # Scanner for SystemVerilog + # Scanner for IEEE Standard 1800-2009 SystemVerilog # # Aliases: +verilog+ class SystemVerilog < Scanner diff --git a/lib/coderay/scanners/vhdl.rb b/lib/coderay/scanners/vhdl.rb index 3848a42c..924b0236 100644 --- a/lib/coderay/scanners/vhdl.rb +++ b/lib/coderay/scanners/vhdl.rb @@ -2,10 +2,9 @@ module CodeRay module Scanners # TODO: Review. Most of this code is copied from the C++ scanner and not - # for applicability for VHDL. + # applicable for VHDL. - # Scanner for VHDL - # + # Scanner for IEEE Standard 1076-2002 VHDL class VHDL < Scanner register_for :vhdl From 41093673c64a599ad43c630a1b15ab3cb9744876 Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Sat, 12 Jan 2013 16:32:02 +0100 Subject: [PATCH 7/9] Fix comment language. Better identify language. Add disclaimer about lazy parsing. --- lib/coderay/scanners/e.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/coderay/scanners/e.rb b/lib/coderay/scanners/e.rb index a56e3c73..cdfb57f0 100644 --- a/lib/coderay/scanners/e.rb +++ b/lib/coderay/scanners/e.rb @@ -2,9 +2,9 @@ module CodeRay module Scanners # TODO: Review. Most of this code is copied from the C++ scanner and not - # for applicability for Specman e. + # applicable for e. - # Scanner for Specman e + # Scanner for IEEE Std 1647 e # # Aliases: +specmen+ class E < Scanner @@ -15,6 +15,11 @@ class E < Scanner # TODO: DUT'unique_name + # NOTE: e is an odd language in that the essential vocabulary of the + # language (like 'extend') are not actually reserved words. The following + # parser isn't smart enough to handle that; it pretends that these are + # reserved words. + KEYWORDS = [ 'a', 'also', 'bits', 'cover', 'each', 'else', 'empty', 'extend', 'first', 'for', 'ignore', 'illegal', 'if', 'in', 'is', 'item', 'keep', 'like', 'list', 'not', 'only', 'package', 'per_instance', 'radix', 'result', 'return', 'rf_manager', 'soft', 'sys', 'type', 'unit', 'using', 'var', 'when' ] # :nodoc: From fb8c44574a30bb3a6699430667e573963cf1aaf5 Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Sun, 13 Jan 2013 00:10:29 +0000 Subject: [PATCH 8/9] Fix comments to not be /*foo*/. Understand <'code'> and everything else is a comment. --- lib/coderay/scanners/e.rb | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/lib/coderay/scanners/e.rb b/lib/coderay/scanners/e.rb index cdfb57f0..89cd39de 100644 --- a/lib/coderay/scanners/e.rb +++ b/lib/coderay/scanners/e.rb @@ -49,7 +49,7 @@ class E < Scanner def scan_tokens encoder, options - state = :initial + state = :comment_segment label_expected = true case_expected = false label_expected_before_preproc_line = nil @@ -59,16 +59,31 @@ def scan_tokens encoder, options case state - when :initial + when :comment_segment + if match = scan(/^<'$/) + encoder.text_token match, :operator + state = :code_segment + elsif match = scan(/^.+$/) + encoder.text_token match, :comment + elsif match = scan(/\\\n/) + encoder.text_token match, :space + else + encoder.text_token getch, :error + end + + when :code_segment + if match = scan(/^'>$/) + encoder.text_token match, :operator + state = :comment_segment - if match = scan(/ \s+ | \\\n /x) + elsif match = scan(/ \s+ | \\\n /x) if in_preproc_line && match != "\\\n" && match.index(?\n) in_preproc_line = false label_expected = label_expected_before_preproc_line end encoder.text_token match, :space - elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) + elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* !mx) encoder.text_token match, :comment elsif match = scan(%r! -- [^\n\\]* (?: \\. [^\n\\]* )* !mx) encoder.text_token match, :comment @@ -156,14 +171,14 @@ def scan_tokens encoder, options elsif match = scan(/"/) encoder.text_token match, :delimiter encoder.end_group :string - state = :initial + state = :code_segment label_expected = false elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) encoder.text_token match, :char elsif match = scan(/ \\ | $ /x) encoder.end_group :string encoder.text_token match, :error - state = :initial + state = :code_segment label_expected = false else raise_inspect "else case \" reached; %p not handled." % peek(1), encoder @@ -172,28 +187,28 @@ def scan_tokens encoder, options when :include_expected if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/) encoder.text_token match, :include - state = :initial + state = :code_segment elsif match = scan(/\s+/) encoder.text_token match, :space - state = :initial if match.index ?\n + state = :code_segment if match.index ?\n else - state = :initial + state = :code_segment end when :class_name_expected if match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) encoder.text_token match, :class - state = :initial + state = :code_segment elsif match = scan(/\s+/) encoder.text_token match, :space else encoder.text_token getch, :error - state = :initial + state = :code_segment end From f2d6d7441f50a74b8abfabbba90fc53e5e83aad0 Mon Sep 17 00:00:00 2001 From: Scott Roland Date: Sun, 13 Jan 2013 20:49:57 +0000 Subject: [PATCH 9/9] Add e Sized and Unsized numbers --- lib/coderay/scanners/e.rb | 47 ++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/lib/coderay/scanners/e.rb b/lib/coderay/scanners/e.rb index 89cd39de..c6fac2bc 100644 --- a/lib/coderay/scanners/e.rb +++ b/lib/coderay/scanners/e.rb @@ -92,6 +92,37 @@ def scan_tokens encoder, options match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? encoder.text_token match, :comment + ######################### + # 4.1.4.2 Sized numbers + elsif match = scan(/[0-9]+'[bB][0-1_]+/) + label_expected = false + encoder.text_token match, :binary + elsif match = scan(/[0-9]+'[oO][0-7_]+/) + label_expected = false + encoder.text_token match, :octal + elsif match = scan(/[0-9]+'[dD][0-9_a-f]+/) + label_expected = false + encoder.text_token match, :integer + elsif match = scan(/[0-9]+'[hxHX][0-9_a-f]+/) + label_expected = false + encoder.text_token match, :hex + + ######################### + # 4.1.4.1 Unsized numbers + elsif match = scan(/0x[0-9_a-f]+/) + label_expected = false + encoder.text_token match, :hex + elsif match = scan(/0o([0-7_]+)/) + label_expected = false + encoder.text_token match, :octal + elsif match = scan(/0b[0-1_]+/) + label_expected = false + encoder.text_token match, :binary + elsif match = scan(/\-?[0-9_]+[kmKM]?/) + label_expected = false + encoder.text_token match, :integer + + elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x) label_expected = match =~ /[;\{\}]/ if case_expected @@ -140,22 +171,6 @@ def scan_tokens encoder, options label_expected = false encoder.text_token match, :char - elsif match = scan(/0[xX][0-9_A-Fa-f]+/) - label_expected = false - encoder.text_token match, :hex - - elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/) - label_expected = false - encoder.text_token match, :octal - - elsif match = scan(/0[b][0-9_]+/) - label_expected = false - encoder.text_token match, :binary - - elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/) - label_expected = false - encoder.text_token match, :integer - elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) label_expected = false encoder.text_token match, :float