Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a lexer for Ada 2012. #1255

Merged
merged 2 commits into from
Jul 11, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions lib/rouge/demos/ada
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
with Ada.Directories;
with Ada.Direct_IO;
with Ada.Text_IO;

procedure Extra_IO.Read_File (Name : String) is

package Dirs renames Ada.Directories;
package Text_IO renames Ada.Text_IO;

-- Get the size of the file for a new string.
Size : Natural := Natural (Dirs.Size (Name));
subtype File_String is String (1 .. Size);

-- Instantiate Direct_IO for our file type.
package FIO is new Ada.Direct_IO (File_String);

File : FIO.File_Type;
Contents : File_String;

begin
FIO.Open (File, FIO.In_File, Name);
FIO.Read (File, Contents);
FIO.Close (File);

Text_IO.Put (Contents);
end Extra_IO.Read_File;
146 changes: 146 additions & 0 deletions lib/rouge/lexers/ada.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class Ada < RegexLexer
tag 'ada'
filenames '*.ada', '*.ads', '*.adb', '*.gpr'
mimetypes 'text/x-ada'

title 'Ada'
desc 'The Ada 2012 programming language'

# Ada identifiers are Unicode with underscores only allowed as separators.
ID = /\b[[:alpha:]](?:\p{Pc}?[[:alnum:]])*\b/

# Numerals can also contain underscores.
NUM = /\d(_?\d)*/
XNUM = /\h(_?\h)*/
EXP = /(E[-+]?#{NUM})?/i

# Return a hash mapping lower-case identifiers to token classes.
def self.idents
@idents ||= Hash.new(Name).tap do |h|
%w(
abort abstract accept access aliased all array at begin body
case constant declare delay delta digits do else elsif end
exception exit for generic goto if in interface is limited
loop new null of others out overriding pragma private
protected raise range record renames requeue return reverse
select separate some synchronized tagged task terminate then
until use when while with
).each {|w| h[w] = Keyword}

%w(abs and mod not or rem xor).each {|w| h[w] = Operator::Word}

%w(
entry function package procedure subtype type
).each {|w| h[w] = Keyword::Declaration}

%w(
boolean character constraint_error duration float integer
natural positive long_float long_integer long_long_float
long_long_integer program_error short_float short_integer
short_short_integer storage_error string tasking_error
wide_character wide_string wide_wide_character
wide_wide_string
).each {|w| h[w] = Name::Builtin}
end
end

state :whitespace do
rule %r{\s+}m, Text
rule %r{--.*$}, Comment::Single
end

state :dquote_string do
rule %r{[^"\n]+}, Literal::String::Double
rule %r{""}, Literal::String::Escape
rule %r{"}, Literal::String::Double, :pop!
rule %r{\n}, Error, :pop!
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
end

state :attr do
mixin :whitespace
rule ID, Name::Attribute, :pop!
rule %r{}, Text, :pop!
end

# Handle a dotted name immediately following a declaration keyword.
state :decl_name do
mixin :whitespace
rule %r{body\b}i, Keyword::Declaration # package body Foo.Bar is...
rule %r{(#{ID})(\.)} do
groups Name::Namespace, Punctuation
end
# function "<=" (Left, Right: Type) is ...
rule %r{#{ID}|"(and|or|xor|/?=|<=?|>=?|\+|–|&\|/|mod|rem|\*?\*|abs|not)"},
Name::Function, :pop!
rule %r{}, Text, :pop!
end

# Handle a sequence of library unit names: with Ada.Foo, Ada.Bar;
state :libunit_name do
mixin :whitespace
rule ID do
token Name::Namespace
goto :libunit_sep
end
rule %r{}, Text, :pop!
end

state :libunit_sep do
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
mixin :whitespace
rule %r{[.,]} do
token Punctuation
goto :libunit_name
end
rule %r{}, Text, :pop!
end

state :root do
mixin :whitespace

# String literals.
rule %r{'.'}, Literal::String::Char
rule %r{"[^"\n]*}, Literal::String::Double, :dquote_string

# Real literals.
rule %r{#{NUM}\.#{NUM}#{EXP}}, Literal::Number::Float
rule %r{#{NUM}##{XNUM}\.#{XNUM}##{EXP}}, Literal::Number::Float

# Integer literals.
rule %r{2#[01](_?[01])*##{EXP}}, Literal::Number::Bin
rule %r{8#[0-7](_?[0-7])*##{EXP}}, Literal::Number::Oct
rule %r{16##{XNUM}*##{EXP}}, Literal::Number::Hex
rule %r{#{NUM}##{XNUM}##{EXP}}, Literal::Number::Integer
rule %r{#{NUM}#\w+#}, Error
rule %r{#{NUM}#{EXP}}, Literal::Number::Integer

# Special constructs.
rule %r{'}, Punctuation, :attr
rule %r{<<#{ID}>>}, Name::Label

# Context clauses are tricky because the 'with' keyword is used
# for many purposes. Detect at beginning of the line only.
rule %r{^(limited\s+)?(private\s+)?with\b}i, Keyword::Namespace, :libunit_name
pyrmont marked this conversation as resolved.
Show resolved Hide resolved

# Operators and punctuation characters.
rule %r{[+*/&<=>|]|-|=>|\.\.|\*\*|[:></]=|<<|>>|<>}, Operator
rule %r{[.,:;()]}, Punctuation

rule ID do |m|
t = self.class.idents[m[0].downcase]
token t
if t == Keyword::Declaration
push :decl_name
end
end

# Flag word-like things that don't match the ID pattern.
rule %r{\b(\p{Pc}|[[alpha]])\p{Word}*}, Error
end
end
end
end
107 changes: 107 additions & 0 deletions spec/lexers/ada_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Ada do
let(:subject) { Rouge::Lexers::Ada.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.ada'
assert_guess :filename => 'foo.adb'
assert_guess :filename => 'foo.ads'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'text/x-ada'
end
end

describe 'lexing' do
include Support::Lexing

it 'classifies identifiers' do
assert_tokens_equal 'constant Boolean := A and B',
['Keyword', 'constant'],
['Text', ' '],
['Name.Builtin', 'Boolean'],
['Text', ' '],
['Operator', ':='],
['Text', ' '],
['Name', 'A'],
['Text', ' '],
['Operator.Word', 'and'],
['Text', ' '],
['Name', 'B']
end

it 'accepts Unicode identifiers' do
assert_tokens_equal '東京', ['Name', '東京']
assert_tokens_equal '0東京', ['Error', '0'], ['Name', '東京']
assert_tokens_equal '東京0', ['Name', '東京0']
end

it 'rejects identifiers with double or trailing underscores' do
assert_tokens_equal '_ab', ['Error', '_ab']
assert_tokens_equal 'a__b', ['Error', 'a__b']
assert_tokens_equal 'a_b', ['Name', 'a_b']
assert_tokens_equal 'ab_', ['Error', 'ab_']
end

it 'understands other connecting punctuation' do
assert_tokens_equal 'a﹏b', ['Name', 'a﹏b']
assert_tokens_equal '﹏ab', ['Error', '﹏ab']
assert_tokens_equal 'a﹏﹏b', ['Error', 'a﹏﹏b']
assert_tokens_equal 'ab﹏', ['Error', 'ab﹏']
end

it 'classifies based number literals' do
assert_tokens_equal '2#0001_1110#', ['Literal.Number.Bin', '2#0001_1110#']
assert_tokens_equal '2#0001__1110#', ['Error', '2#0001__1110#']
assert_tokens_equal '8#1234_0000#', ['Literal.Number.Oct', '8#1234_0000#']
assert_tokens_equal '16#abc_BBB_12#', ['Literal.Number.Hex', '16#abc_BBB_12#']
assert_tokens_equal '4#1230000#e+5', ['Literal.Number.Integer', '4#1230000#e+5']
assert_tokens_equal '2#0001_1110#e3', ['Literal.Number.Bin', '2#0001_1110#e3']

assert_tokens_equal '16#abc_BBB.12#', ['Literal.Number.Float', '16#abc_BBB.12#']
end

it 'recognizes exponents in integers and reals' do
assert_tokens_equal '1e6', ['Literal.Number.Integer', '1e6']
assert_tokens_equal '123_456', ['Literal.Number.Integer', '123_456']
assert_tokens_equal '3.14159_26', ['Literal.Number.Float', '3.14159_26']
assert_tokens_equal '3.141_592e-20', ['Literal.Number.Float', '3.141_592e-20']
end

it 'highlights escape sequences inside doubly quoted strings' do
assert_tokens_equal '"Archimedes said ""Εύρηκα"""',
['Literal.String.Double', '"Archimedes said '],
['Literal.String.Escape', '""'],
['Literal.String.Double', 'Εύρηκα'],
['Literal.String.Escape', '""'],
['Literal.String.Double', '"']
end

it 'marks function names in declarations' do
assert_tokens_equal 'Entry Foo IS',
['Keyword.Declaration', 'Entry'],
['Text', ' '],
['Name.Function', 'Foo'],
['Text', ' '],
['Keyword', 'IS']

assert_tokens_equal 'package body Ada.Foo IS',
['Keyword.Declaration', 'package'],
['Text', ' '],
['Keyword.Declaration', 'body'],
['Text', ' '],
['Name.Namespace', 'Ada'],
['Punctuation', '.'],
['Name.Function', 'Foo'],
['Text', ' '],
['Keyword', 'IS']
end
end
end

Loading