From 9ea57bafbbcc75bdab0960efd5eaa292eb669ea1 Mon Sep 17 00:00:00 2001 From: Filipe Garcia Date: Wed, 21 Nov 2018 09:56:06 +0100 Subject: [PATCH 01/13] Added CSV Schema language --- lib/rouge/demos/csvs | 8 +++++ lib/rouge/lexers/csvs.rb | 69 ++++++++++++++++++++++++++++++++++++++++ spec/lexers/csvs_spec.rb | 22 +++++++++++++ spec/visual/samples/csvs | 22 +++++++++++++ 4 files changed, 121 insertions(+) create mode 100644 lib/rouge/demos/csvs create mode 100644 lib/rouge/lexers/csvs.rb create mode 100644 spec/lexers/csvs_spec.rb create mode 100644 spec/visual/samples/csvs diff --git a/lib/rouge/demos/csvs b/lib/rouge/demos/csvs new file mode 100644 index 0000000000..c329065508 --- /dev/null +++ b/lib/rouge/demos/csvs @@ -0,0 +1,8 @@ +version 1.1 +@totalColumns 5 +@separator ',' +Transaction_Date: xDate +Transaction_ID: notEmpty +Originator_Name: notEmpty +Originator_Address: any("yes","no") +Originator_Country: notEmpty \ No newline at end of file diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb new file mode 100644 index 0000000000..09cb9221a7 --- /dev/null +++ b/lib/rouge/lexers/csvs.rb @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class CSVS < RegexLexer + tag 'csvs' + title "csvs" + desc 'The CSV Schema Language (http://digital-preservation.github.io/csv-schema/)' + mimetypes 'text/x-csvssrc' + filenames '*.csvs' + + def self.detect?(text) + return true if text.shebang? 'csvs' + end + + def self.constants + @constants ||= Set.new %w(nil false true) + end + + def self.builtins + @builtins ||= Set.new %w( + args call clone do doFile doString else elseif for if list + method return super then + ) + end + + state :root do + rule /\s+/m, Text + rule %r(//.*?\n), Comment::Single + rule %r(#.*?\n), Comment::Single + rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline + rule %r(/[+]), Comment::Multiline, :nested_comment + + rule /"(\\\\|\\"|[^"])*"/, Str + + rule %r(:?:=), Keyword + rule /[()]/, Punctuation + + rule %r([-=;,*+> 'foo.csvs' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/x-csvssrc' + end + + it 'guesses by source' do + assert_guess :source => '#!/usr/local/bin/csvs' + end + end +end diff --git a/spec/visual/samples/csvs b/spec/visual/samples/csvs new file mode 100644 index 0000000000..42bef2f78a --- /dev/null +++ b/spec/visual/samples/csvs @@ -0,0 +1,22 @@ +version 1.0 +@totalColumns 19 +@separator ',' +Customer_Name: notEmpty +Customer_Date_of_Birth: xDate +Customer_Place_of_Birth: notEmpty +Customer_Address: notEmpty +Customer_Account_number: notEmpty +Customer_Legal_Entity_Type: notEmpty +Customer_Industry: notEmpty +Adverse_Information_Search: xDate +Customer_Product_Usage: notEmpty +Customer_Net_Worth: notEmpty +Customer_Cash_Balance: notEmpty +Account_Open_Date: notEmpty +CDD_Completion_Date: notEmpty +System_Customer_Risk_Rating: notEmpty +PEP_Status: any("yes","no") +Associated_PEP: any("yes","no") +EDD_Triggered: any("yes","no") +Last_CDD_Renewal_Date: xDate +Relationship_Manager_Employee_ID: notEmpty \ No newline at end of file From 7fa40b94411ac2069993193a7af14d4ef8fd9fe7 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:24:47 +0900 Subject: [PATCH 02/13] Simplify description URL --- lib/rouge/lexers/csvs.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index 09cb9221a7..eaec0cd855 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -6,7 +6,7 @@ module Lexers class CSVS < RegexLexer tag 'csvs' title "csvs" - desc 'The CSV Schema Language (http://digital-preservation.github.io/csv-schema/)' + desc 'The CSV Schema Language (digital-preservation.github.io)' mimetypes 'text/x-csvssrc' filenames '*.csvs' From 974c664f1baafb89d0caf950d9d06e4b66b2c381 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:25:14 +0900 Subject: [PATCH 03/13] Remove MIME type --- lib/rouge/lexers/csvs.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index eaec0cd855..f3f4f284bf 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -7,7 +7,6 @@ class CSVS < RegexLexer tag 'csvs' title "csvs" desc 'The CSV Schema Language (digital-preservation.github.io)' - mimetypes 'text/x-csvssrc' filenames '*.csvs' def self.detect?(text) From 986cccf2271a0f765eaae62ed6c33ea18fcb1df4 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:26:07 +0900 Subject: [PATCH 04/13] Remove :detect? method --- lib/rouge/lexers/csvs.rb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index f3f4f284bf..9e5892fdf3 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -9,10 +9,6 @@ class CSVS < RegexLexer desc 'The CSV Schema Language (digital-preservation.github.io)' filenames '*.csvs' - def self.detect?(text) - return true if text.shebang? 'csvs' - end - def self.constants @constants ||= Set.new %w(nil false true) end From 6bfb48fce506f8ac4c6226707190dc80528fb03e Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:26:57 +0900 Subject: [PATCH 05/13] Reformat built-ins list --- lib/rouge/lexers/csvs.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index 9e5892fdf3..87863416ca 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -15,8 +15,8 @@ def self.constants def self.builtins @builtins ||= Set.new %w( - args call clone do doFile doString else elseif for if list - method return super then + args call clone do doFile doString else elseif for if list method + return super then ) end From d724011b28c78eafb50beffab6939f83df4843b7 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:30:37 +0900 Subject: [PATCH 06/13] Remove ambiguous regular expression literals --- lib/rouge/lexers/csvs.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index 87863416ca..37b42de1e0 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -21,22 +21,22 @@ def self.builtins end state :root do - rule /\s+/m, Text + rule %r/\s+/m, Text rule %r(//.*?\n), Comment::Single rule %r(#.*?\n), Comment::Single rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline rule %r(/[+]), Comment::Multiline, :nested_comment - rule /"(\\\\|\\"|[^"])*"/, Str + rule %r/"(\\\\|\\"|[^"])*"/, Str rule %r(:?:=), Keyword - rule /[()]/, Punctuation + rule %r/[()]/, Punctuation rule %r([-=;,*+> Date: Tue, 3 Sep 2019 08:31:32 +0900 Subject: [PATCH 07/13] Remove unnecessary tests --- spec/lexers/csvs_spec.rb | 8 -------- 1 file changed, 8 deletions(-) diff --git a/spec/lexers/csvs_spec.rb b/spec/lexers/csvs_spec.rb index ca3d889234..eeb10ca337 100644 --- a/spec/lexers/csvs_spec.rb +++ b/spec/lexers/csvs_spec.rb @@ -10,13 +10,5 @@ it 'guesses by filename' do assert_guess :filename => 'foo.csvs' end - - it 'guesses by mimetype' do - assert_guess :mimetype => 'text/x-csvssrc' - end - - it 'guesses by source' do - assert_guess :source => '#!/usr/local/bin/csvs' - end end end From b7d2abfaf53fe5c43223e320f87ab7b80b465e68 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:34:44 +0900 Subject: [PATCH 08/13] Remove unnecessary newlines from pattern --- lib/rouge/lexers/csvs.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index 37b42de1e0..c6460314e8 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -22,8 +22,8 @@ def self.builtins state :root do rule %r/\s+/m, Text - rule %r(//.*?\n), Comment::Single - rule %r(#.*?\n), Comment::Single + rule %r(//.*), Comment::Single + rule %r(#.*), Comment::Single rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline rule %r(/[+]), Comment::Multiline, :nested_comment From a2ba675d14ad28274e3f84c14aa3d3de7d79eafa Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:56:33 +0900 Subject: [PATCH 09/13] Conform string rule to spec --- lib/rouge/lexers/csvs.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index c6460314e8..e966ca437f 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -27,7 +27,7 @@ def self.builtins rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline rule %r(/[+]), Comment::Multiline, :nested_comment - rule %r/"(\\\\|\\"|[^"])*"/, Str + rule %r/"[^"]*"/, Str::Double rule %r(:?:=), Keyword rule %r/[()]/, Punctuation From d0d8d21d73d1c4334bdea2e04fd6f9415fde8edf Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 08:59:15 +0900 Subject: [PATCH 10/13] Add rule for character literals --- lib/rouge/lexers/csvs.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index e966ca437f..8721e2b9fb 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -28,6 +28,7 @@ def self.builtins rule %r(/[+]), Comment::Multiline, :nested_comment rule %r/"[^"]*"/, Str::Double + rule %r/'[^\r\n\f']'/, Str::Char rule %r(:?:=), Keyword rule %r/[()]/, Punctuation From b7c2829e373a16c81a389eccf88b5edd163ef127 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 09:56:54 +0900 Subject: [PATCH 11/13] Remove constants --- lib/rouge/lexers/csvs.rb | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index 8721e2b9fb..20b721ad7c 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -9,10 +9,6 @@ class CSVS < RegexLexer desc 'The CSV Schema Language (digital-preservation.github.io)' filenames '*.csvs' - def self.constants - @constants ||= Set.new %w(nil false true) - end - def self.builtins @builtins ||= Set.new %w( args call clone do doFile doString else elseif for if list method @@ -40,9 +36,7 @@ def self.builtins rule %r/[a-z_]\w*/ do |m| name = m[0] - if self.class.constants.include? name - token Keyword::Constant - elsif self.class.builtins.include? name + if self.class.builtins.include? name token Name::Builtin else token Name From faacf42dd272ad590a4c7e204f571a1cd1cb0687 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 3 Sep 2019 18:42:23 +0900 Subject: [PATCH 12/13] Remove built-ins --- lib/rouge/lexers/csvs.rb | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index 20b721ad7c..2c7af788f8 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -9,13 +9,6 @@ class CSVS < RegexLexer desc 'The CSV Schema Language (digital-preservation.github.io)' filenames '*.csvs' - def self.builtins - @builtins ||= Set.new %w( - args call clone do doFile doString else elseif for if list method - return super then - ) - end - state :root do rule %r/\s+/m, Text rule %r(//.*), Comment::Single @@ -33,15 +26,7 @@ def self.builtins rule %r/[A-Z]\w*/, Name::Class - rule %r/[a-z_]\w*/ do |m| - name = m[0] - - if self.class.builtins.include? name - token Name::Builtin - else - token Name - end - end + rule %r/[a-z_]\w*/, Name rule %r((\d+[.]?\d*|\d*[.]\d+)(e[+-]?[0-9]+)?)i, Num::Float rule %r/\d+/, Num::Integer From 690cad6cfb81900db57501666b99d16b6741190e Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Wed, 4 Sep 2019 09:24:29 +0900 Subject: [PATCH 13/13] Conform lexer rules to specification --- lib/rouge/lexers/csvs.rb | 38 ++++++++++++++-------------- spec/visual/samples/csvs | 54 ++++++++++++++++++++++++---------------- 2 files changed, 51 insertions(+), 41 deletions(-) diff --git a/lib/rouge/lexers/csvs.rb b/lib/rouge/lexers/csvs.rb index 2c7af788f8..e4294453ea 100644 --- a/lib/rouge/lexers/csvs.rb +++ b/lib/rouge/lexers/csvs.rb @@ -11,33 +11,33 @@ class CSVS < RegexLexer state :root do rule %r/\s+/m, Text - rule %r(//.*), Comment::Single - rule %r(#.*), Comment::Single - rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline - rule %r(/[+]), Comment::Multiline, :nested_comment - rule %r/"[^"]*"/, Str::Double - rule %r/'[^\r\n\f']'/, Str::Char - - rule %r(:?:=), Keyword - rule %r/[()]/, Punctuation + rule %r(//[\S\t ]*), Comment::Single + rule %r(/\*[^*]*\*/)m, Comment::Multiline - rule %r([-=;,*+>