From 6e48606bf4bddade9766ca92c4c8f61a57fad634 Mon Sep 17 00:00:00 2001 From: Trond Aasan Date: Thu, 21 Feb 2019 19:00:37 +0100 Subject: [PATCH 1/7] Add PostScript Lexer --- lib/rouge/demos/postscript | 9 + lib/rouge/lexers/postscript.rb | 97 ++++++ spec/visual/samples/postscript | 537 +++++++++++++++++++++++++++++++++ 3 files changed, 643 insertions(+) create mode 100644 lib/rouge/demos/postscript create mode 100644 lib/rouge/lexers/postscript.rb create mode 100644 spec/visual/samples/postscript diff --git a/lib/rouge/demos/postscript b/lib/rouge/demos/postscript new file mode 100644 index 0000000000..9e18971897 --- /dev/null +++ b/lib/rouge/demos/postscript @@ -0,0 +1,9 @@ + %!PS + /Courier % name the desired font + 20 selectfont % choose the size in points and establish + % the font as the current one + 72 500 moveto % position the current point at + % coordinates 72, 500 (the origin is at the + % lower-left corner of the page) + (Hello world!) show % stroke the text in parentheses + showpage % print all on the page diff --git a/lib/rouge/lexers/postscript.rb b/lib/rouge/lexers/postscript.rb new file mode 100644 index 0000000000..f9e360632a --- /dev/null +++ b/lib/rouge/lexers/postscript.rb @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +# Adapted from pygments PostScriptLexer +module Rouge + module Lexers + class PostScript < RegexLexer + """ + Lexer for PostScript files. + + The PostScript Language Reference published by Adobe at + + is the authority for this. + """ + + title "PostScript" + desc "PostScript" + tag "postscript" + aliases "postscr", "postscript" + filenames "*.ps", "*.eps" + mimetypes "application/postscript" + + delimiter = %s"()<>\[\]{}/%\s" + delimiter_end = Regexp.new("(?=[#{delimiter}])") + valid_name_chars = Regexp.new("[^#{delimiter}]") + valid_name = /#{valid_name_chars}+#{delimiter_end}/ + + # These keywords taken from + # + # Is there an authoritative list anywhere that doesn't involve + # trawling documentation? + keywords = %w/abs add aload arc arcn array atan begin + bind ceiling charpath clip closepath concat + concatmatrix copy cos currentlinewidth currentmatrix + currentpoint curveto cvi cvs def defaultmatrix + dict dictstackoverflow div dtransform dup end + exch exec exit exp fill findfont floor get + getinterval grestore gsave gt identmatrix idiv + idtransform index invertmatrix itransform length + lineto ln load log loop matrix mod moveto + mul neg newpath pathforall pathbbox pop print + pstack put quit rand rangecheck rcurveto repeat + restore rlineto rmoveto roll rotate round run + save scale scalefont setdash setfont setgray + setlinecap setlinejoin setlinewidth setmatrix + setrgbcolor shfill show showpage sin sqrt + stack stringwidth stroke strokepath sub syntaxerror + transform translate truncate typecheck undefined + undefinedfilename undefinedresult/ + + state :root do + # All comment types + rule %r'^%!.+?$', Comment::Preproc + rule %r'%%.*?$', Comment::Special + rule %r'(^%.*?$){2,}', Comment::Multiline + rule %r'%.*?$', Comment::Single + + # String literals are awkward; enter separate state. + rule %r'\(', Str, :stringliteral + + # References + rule %r'/#{valid_name}', Name::Variable + + rule %r'[{}<>\[\]]', Punctuation + + rule %r'(?:#{keywords.join('|')}#{delimiter_end})', Name::Builtin + + # Conditionals / flow control + rule /(eq|ne|g[et]|l[et]|and|or|not|if(?:else)?|for(?:all)?)#{delimiter_end}/, Keyword::Reserved + rule /(false|true)#{delimiter_end}/, Keyword::Constant + + # Numbers + rule %r'<[0-9A-Fa-f]+>#{delimiter_end}', Num::Hex + # Slight abuse: use Oct to signify any explicit base system + rule %r'[0-9]+\#(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)((e|E)[0-9]+)?#{delimiter_end}', Num::Oct + rule %r'(\-|\+)?([0-9]+\.?|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)((e|E)[0-9]+)?#{delimiter_end}', Num::Float + rule %r'(\-|\+)?[0-9]+#{delimiter_end}', Num::Integer + + # Names + rule valid_name, Name::Function # Anything else is executed + + rule /\s+/, Text + end + + state :stringliteral do + rule %r'[^()\\]+', Str + rule %r'\\', Str::Escape, :escape + rule %r'\(', Str, :stringliteral + rule %r'\)', Str, :pop! + end + + state :escape do + rule /[0-8]{3}|n|r|t|b|f|\\|\(|\)/, Str::Escape, :pop! + end + end + end +end diff --git a/spec/visual/samples/postscript b/spec/visual/samples/postscript new file mode 100644 index 0000000000..fa03c52cb1 --- /dev/null +++ b/spec/visual/samples/postscript @@ -0,0 +1,537 @@ +%!PS-Adobe-3.0 +%%BoundingBox: 38 24 574 768 +%%Title: Enscript Output +%%For: Trond Aasan +%%Creator: GNU Enscript 1.6.6 +%%CreationDate: Tue Feb 19 15:28:49 2019 +%%Orientation: Portrait +%%Pages: (atend) +%%DocumentMedia: Letter 612 792 0 () () +%%DocumentNeededResources: (atend) +%%EndComments +%%BeginProlog +%%BeginResource: procset Enscript-Prolog 1.6 6 +% +% Procedures. +% + +/_S { % save current state + /_s save def +} def +/_R { % restore from saved state + _s restore +} def + +/S { % showpage protecting gstate + gsave + showpage + grestore +} bind def + +/MF { % fontname newfontname -> - make a new encoded font + /newfontname exch def + /fontname exch def + + /fontdict fontname findfont def + /newfont fontdict maxlength dict def + + fontdict { + exch + dup /FID eq { + % skip FID pair + pop pop + } { + % copy to the new font dictionary + exch newfont 3 1 roll put + } ifelse + } forall + + newfont /FontName newfontname put + + % insert only valid encoding vectors + encoding_vector length 256 eq { + newfont /Encoding encoding_vector put + } if + + newfontname newfont definefont pop +} def + +/MF_PS { % fontname newfontname -> - make a new font preserving its enc + /newfontname exch def + /fontname exch def + + /fontdict fontname findfont def + /newfont fontdict maxlength dict def + + fontdict { + exch + dup /FID eq { + % skip FID pair + pop pop + } { + % copy to the new font dictionary + exch newfont 3 1 roll put + } ifelse + } forall + + newfont /FontName newfontname put + + newfontname newfont definefont pop +} def + +/SF { % fontname width height -> - set a new font + /height exch def + /width exch def + + findfont + [width 0 0 height 0 0] makefont setfont +} def + +/SUF { % fontname width height -> - set a new user font + /height exch def + /width exch def + + /F-gs-user-font MF + /F-gs-user-font width height SF +} def + +/SUF_PS { % fontname width height -> - set a new user font preserving its enc + /height exch def + /width exch def + + /F-gs-user-font MF_PS + /F-gs-user-font width height SF +} def + +/M {moveto} bind def +/s {show} bind def + +/Box { % x y w h -> - define box path + /d_h exch def /d_w exch def /d_y exch def /d_x exch def + d_x d_y moveto + d_w 0 rlineto + 0 d_h rlineto + d_w neg 0 rlineto + closepath +} def + +/bgs { % x y height blskip gray str -> - show string with bg color + /str exch def + /gray exch def + /blskip exch def + /height exch def + /y exch def + /x exch def + + gsave + x y blskip sub str stringwidth pop height Box + gray setgray + fill + grestore + x y M str s +} def + +/bgcs { % x y height blskip red green blue str -> - show string with bg color + /str exch def + /blue exch def + /green exch def + /red exch def + /blskip exch def + /height exch def + /y exch def + /x exch def + + gsave + x y blskip sub str stringwidth pop height Box + red green blue setrgbcolor + fill + grestore + x y M str s +} def + +% Highlight bars. +/highlight_bars { % nlines lineheight output_y_margin gray -> - + gsave + setgray + /ymarg exch def + /lineheight exch def + /nlines exch def + + % This 2 is just a magic number to sync highlight lines to text. + 0 d_header_y ymarg sub 2 sub translate + + /cw d_output_w cols div def + /nrows d_output_h ymarg 2 mul sub lineheight div cvi def + + % for each column + 0 1 cols 1 sub { + cw mul /xp exch def + + % for each rows + 0 1 nrows 1 sub { + /rn exch def + rn lineheight mul neg /yp exch def + rn nlines idiv 2 mod 0 eq { + % Draw highlight bar. 4 is just a magic indentation. + xp 4 add yp cw 8 sub lineheight neg Box fill + } if + } for + } for + + grestore +} def + +% Line highlight bar. +/line_highlight { % x y width height gray -> - + gsave + /gray exch def + Box gray setgray fill + grestore +} def + +% Column separator lines. +/column_lines { + gsave + .1 setlinewidth + 0 d_footer_h translate + /cw d_output_w cols div def + 1 1 cols 1 sub { + cw mul 0 moveto + 0 d_output_h rlineto stroke + } for + grestore +} def + +% Column borders. +/column_borders { + gsave + .1 setlinewidth + 0 d_footer_h moveto + 0 d_output_h rlineto + d_output_w 0 rlineto + 0 d_output_h neg rlineto + closepath stroke + grestore +} def + +% Do the actual underlay drawing +/draw_underlay { + ul_style 0 eq { + ul_str true charpath stroke + } { + ul_str show + } ifelse +} def + +% Underlay +/underlay { % - -> - + gsave + 0 d_page_h translate + d_page_h neg d_page_w atan rotate + + ul_gray setgray + ul_font setfont + /dw d_page_h dup mul d_page_w dup mul add sqrt def + ul_str stringwidth pop dw exch sub 2 div ul_h_ptsize -2 div moveto + draw_underlay + grestore +} def + +/user_underlay { % - -> - + gsave + ul_x ul_y translate + ul_angle rotate + ul_gray setgray + ul_font setfont + 0 0 ul_h_ptsize 2 div sub moveto + draw_underlay + grestore +} def + +% Page prefeed +/page_prefeed { % bool -> - + statusdict /prefeed known { + statusdict exch /prefeed exch put + } { + pop + } ifelse +} def + +% Wrapped line markers +/wrapped_line_mark { % x y charwith charheight type -> - + /type exch def + /h exch def + /w exch def + /y exch def + /x exch def + + type 2 eq { + % Black boxes (like TeX does) + gsave + 0 setlinewidth + x w 4 div add y M + 0 h rlineto w 2 div 0 rlineto 0 h neg rlineto + closepath fill + grestore + } { + type 3 eq { + % Small arrows + gsave + .2 setlinewidth + x w 2 div add y h 2 div add M + w 4 div 0 rlineto + x w 4 div add y lineto stroke + + x w 4 div add w 8 div add y h 4 div add M + x w 4 div add y lineto + w 4 div h 8 div rlineto stroke + grestore + } { + % do nothing + } ifelse + } ifelse +} def + +% EPSF import. + +/BeginEPSF { + /b4_Inc_state save def % Save state for cleanup + /dict_count countdictstack def % Count objects on dict stack + /op_count count 1 sub def % Count objects on operand stack + userdict begin + /showpage { } def + 0 setgray 0 setlinecap + 1 setlinewidth 0 setlinejoin + 10 setmiterlimit [ ] 0 setdash newpath + /languagelevel where { + pop languagelevel + 1 ne { + false setstrokeadjust false setoverprint + } if + } if +} bind def + +/EndEPSF { + count op_count sub { pos } repeat % Clean up stacks + countdictstack dict_count sub { end } repeat + b4_Inc_state restore +} bind def + +% Check PostScript language level. +/languagelevel where { + pop /gs_languagelevel languagelevel def +} { + /gs_languagelevel 1 def +} ifelse +%%EndResource +%%BeginResource: procset Enscript-Encoding-88591 1.6 6 +/encoding_vector [ +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/space /exclam /quotedbl /numbersign +/dollar /percent /ampersand /quoteright +/parenleft /parenright /asterisk /plus +/comma /hyphen /period /slash +/zero /one /two /three +/four /five /six /seven +/eight /nine /colon /semicolon +/less /equal /greater /question +/at /A /B /C +/D /E /F /G +/H /I /J /K +/L /M /N /O +/P /Q /R /S +/T /U /V /W +/X /Y /Z /bracketleft +/backslash /bracketright /asciicircum /underscore +/quoteleft /a /b /c +/d /e /f /g +/h /i /j /k +/l /m /n /o +/p /q /r /s +/t /u /v /w +/x /y /z /braceleft +/bar /braceright /tilde /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/.notdef /.notdef /.notdef /.notdef +/space /exclamdown /cent /sterling +/currency /yen /brokenbar /section +/dieresis /copyright /ordfeminine /guillemotleft +/logicalnot /hyphen /registered /macron +/degree /plusminus /twosuperior /threesuperior +/acute /mu /paragraph /bullet +/cedilla /onesuperior /ordmasculine /guillemotright +/onequarter /onehalf /threequarters /questiondown +/Agrave /Aacute /Acircumflex /Atilde +/Adieresis /Aring /AE /Ccedilla +/Egrave /Eacute /Ecircumflex /Edieresis +/Igrave /Iacute /Icircumflex /Idieresis +/Eth /Ntilde /Ograve /Oacute +/Ocircumflex /Otilde /Odieresis /multiply +/Oslash /Ugrave /Uacute /Ucircumflex +/Udieresis /Yacute /Thorn /germandbls +/agrave /aacute /acircumflex /atilde +/adieresis /aring /ae /ccedilla +/egrave /eacute /ecircumflex /edieresis +/igrave /iacute /icircumflex /idieresis +/eth /ntilde /ograve /oacute +/ocircumflex /otilde /odieresis /divide +/oslash /ugrave /uacute /ucircumflex +/udieresis /yacute /thorn /ydieresis +] def +%%EndResource +%%EndProlog +%%BeginSetup +%%IncludeResource: font Courier-Bold +%%IncludeResource: font Courier +/HFpt_w 10 def +/HFpt_h 10 def +/Courier-Bold /HF-gs-font MF +/HF /HF-gs-font findfont [HFpt_w 0 0 HFpt_h 0 0] makefont def +/Courier /F-gs-font MF +/F-gs-font 10 10 SF +/#copies 1 def +% Pagedevice definitions: +gs_languagelevel 1 gt { + << + /PageSize [612 792] + >> setpagedevice +} if +%%BeginResource: procset Enscript-Header-simple 1.6 6 + +/do_header { % print default simple header + gsave + d_header_x d_header_y HFpt_h 3 div add translate + + HF setfont + user_header_p { + 5 0 moveto user_header_left_str show + + d_header_w user_header_center_str stringwidth pop sub 2 div + 0 moveto user_header_center_str show + + d_header_w user_header_right_str stringwidth pop sub 5 sub + 0 moveto user_header_right_str show + } { + fname length fmodstr length add pagenumstr length add 95 6 idiv add d_header_w 6 idiv le{ + 5 0 moveto fname show + 45 0 rmoveto fmodstr show + 45 0 rmoveto pagenumstr show + } { + 5 0 moveto fmodstr show + 45 0 rmoveto pagenumstr show + fname length d_header_w 6 idiv idiv 1 add 10 mul 5 exch moveto + 1 1 fname length d_header_w 6 idiv idiv + { + dup fname exch 1 sub d_header_w 6 idiv mul d_header_w 6 idiv getinterval show + 5 exch 10 mul fname length d_header_w 6 idiv idiv 1 add 10 mul exch sub moveto + } for + 5 10 moveto + fname fname length d_header_w 6 idiv idiv d_header_w 6 idiv mul dup fname length exch sub getinterval show + }ifelse + }ifelse + + grestore +} def +%%EndResource +/d_page_w 536 def +/d_page_h 744 def +/d_header_x 0 def +/d_header_y 700 def +/d_header_w 536 def +/d_header_h 44 def +/d_footer_x 0 def +/d_footer_y 0 def +/d_footer_w 536 def +/d_footer_h 0 def +/d_output_w 536 def +/d_output_h 700 def +/cols 1 def +%%EndSetup +%%Page: (1) 1 +%%BeginPageSetup +_S +38 24 translate +/pagenum 1 def +/fname (t.txt) def +/fdir (.) def +/ftail (t.txt) def +% User defined strings: +/fmodstr (ti. feb. 19 15:28:20 2019) def +/pagenumstr (1) def +/user_header_p false def +/user_footer_p false def +%%EndPageSetup +do_header +5 687 M +(Contributor Code of Conduct) s +5 676 M +(===========================) s +5 654 M +(Version 1.0) s +5 643 M +(-----------) s +5 621 M +(\(from [Coraline's awesome) s +5 610 M +(covenant]\(https://github.com/Bantik/contributor_covenant\)\)) s +5 588 M +(As contributors and maintainers of this project, we pledge to respect) s +5 577 M +(all people who contribute through reporting issues, posting feature) s +5 566 M +(requests, updating documentation, submitting pull requests or patches,) s +5 555 M +(and other activities.) s +5 533 M +(We are committed to making participation in this project a) s +5 522 M +(harrasment-free experience for everyone, regardless of level of) s +5 511 M +(experience, gender, gender identity and expression, sexual orientation,) s +5 500 M +(disability, personal appearance, body size, race, nationality, age, or) s +5 489 M +(religion.) s +5 467 M +(Examples of unacceptable behavior by participants include the use of) s +5 456 M +(sexual langauge or imagery, derogatory comments or personal attacks,) s +5 445 M +(trolling, public or private harassment, insults, or other unprofessional) s +5 434 M +(conduct.) s +5 412 M +(Project maintainers have the right and responsibility to remove, edit,) s +5 401 M +(or reject comments, commits, code, wiki edits, issues, and other) s +5 390 M +(contributions that are not aligned to this Code of Conduct. Project) s +5 379 M +(maintainers who do not follow the Code of Conduct may be removed from) s +5 368 M +(the project team.) s +5 346 M +(Instances of abusive, harassing, or otherwise unacceptable behavior may) s +5 335 M +(be reported by opening an issue or contacting one or more of the project) s +5 324 M +(maintainers.) s +_R +S +%%Trailer +%%Pages: 1 +%%DocumentNeededResources: font Courier-Bold Courier +%%EOF From 8acc03635152e8c9264863928e082737b9ab2523 Mon Sep 17 00:00:00 2001 From: Trond Aasan Date: Thu, 21 Feb 2019 19:01:38 +0100 Subject: [PATCH 2/7] Add Abc lexer --- lib/rouge/demos/abc | 10 ++ lib/rouge/lexers/abc.rb | 291 ++++++++++++++++++++++++++++++++++++++++ spec/lexers/abc_spec.rb | 19 +++ 3 files changed, 320 insertions(+) create mode 100644 lib/rouge/demos/abc create mode 100644 lib/rouge/lexers/abc.rb create mode 100644 spec/lexers/abc_spec.rb diff --git a/lib/rouge/demos/abc b/lib/rouge/demos/abc new file mode 100644 index 0000000000..1a6456960c --- /dev/null +++ b/lib/rouge/demos/abc @@ -0,0 +1,10 @@ +X: 1 +T: Cooley's +R: reel +M: 4/4 +L: 1/8 +K: Edor +|:D2|EBBA B2 EB|B2 AB dBAG|FDAD BDAD|FDAD dAFD| +EBBA B2 EB|B2 AB defg|afec dBAF|DEFD E2:| +|:gf|eB B2 efge|eB B2 gedB|A2 FA DAFA|A2 FA defg| +eB B2 eBgB|eB B2 defg|afec dBAF|DEFD E2:| diff --git a/lib/rouge/lexers/abc.rb b/lib/rouge/lexers/abc.rb new file mode 100644 index 0000000000..33fc62a487 --- /dev/null +++ b/lib/rouge/lexers/abc.rb @@ -0,0 +1,291 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +# Need to match /[^\r\n]*/ instead of /.*/ +# Seems like . matches \n + +module Rouge + module Lexers + class Abc < RegexLexer + title "Abc" + desc "Abc music notation" + tag "abc" + aliases "abc" + filenames "*.abc" + mimetypes "text/vnd.abc", "text/x-abc" + + start do + # Nothing to do, yet + end + + decorations = %w!+ <( <) > >( >) ^ 0 1 2 3 4 5 accent + arpeggio breath coda courtesy crescendo( crescendo) dacapo + dacoda D.C. D.C.alcoda D.C.alfine diminuendo( diminuendo) + downbow D.S. D.S.alcoda D.S.alfine editorial emphasis f + fermata ff fff ffff fine invertedfermata invertedturn + invertedturnx longphrase lowermordent marcato mediumphrase mf + mordent mp open p plus pp ppp pppp pralltriller roll segno sfz + shortphrase slide snap tenuto thumb trill trill( trill) turn + turnx upbow uppermordent wedge! + + instructions = %w"abc-charset abc-version abc-include abc-creator linebreak decoration" + + @@UNKNOWN = Text + @@QUOTE = Str::Double + @@CHORD = Name::Attribute + @@CHORD_PITCH = Str::Backtick + @@KEY = Keyword::Reserved + @@LYRICS = Keyword::Reserved + @@CONTINUATION = Text::Whitespace + @@LYRICS_VALUE = Str::Regex + @@FIELD = Keyword::Declaration + @@FIELD_VALUE = Str + @@INLINE_FIELD = Str::Escape# @@FIELD + @@BAR_LINE = Punctuation + @@OCTAVE = Operator + @@ACCIDENTAL = Operator + @@NOTE = Name::Constant# Str::Symbol + @@REST = @@NOTE + @@DECORATION = Name::Decorator + @@BUILTIN_DECORATION = Name::Builtin + @@GRACE = Punctuation + @@VOICE = Name::Constant + @@DIRECTIVE = Comment::Preproc + @@DIRECTIVE_BUILTIN = Name::Builtin + @@DIRECTIVE_NAME = Name::Tag + @@DIRECTIVE_BEGIN_LABEL = Name::Variable + @@DIRECTIVE_ARGS = Name::Attribute + @@XREF = Keyword::Namespace + @@XREF_VALUE = Name::Namespace + @@TITLE = Generic::Heading + @@DURATION = Num::Integer + @@TUPLET = Name::Function# Keyword::Variable + @@SLUR = Operator + @@TIE = Operator + @@TEXT_MULTILINE = Str::Heredoc + + @@RE_EOL = /(?:\r\n?|\n)/ + @@RE_COMMENT = /%[^\r\n]*?/ + + state :root do + rule /^$/ do + @last_field = nil + end + rule @@RE_EOL, Text + # Directives must be at beginning of line + rule /[\t ]+#{@@RE_COMMENT}$/o, Comment + rule %r( + ^(%%|I:) + (begin) + (text) + ([^%\r\n]+?)? # Arguments + (%[^\r\n]+?)? # Comment + $ + )xm do |m| + type = m[3] + groups @@DIRECTIVE, @@DIRECTIVE_NAME, @@DIRECTIVE_BEGIN_LABEL, @@DIRECTIVE_ARGS, Comment + push :begin_text + end + rule %r/ + ^(%%|\\|I:) + (\S+?(?=\s)) # Name + ([^%\r\n]+?)? # Arguments + (%[^\r\n]+?)? # Comment + $ + /ox do |m| + if (%r|#{instructions.map{|s|Regexp.escape s}.join('|')}|o =~ m[2]) + type = @@DIRECTIVE_BUILTIN + else + type = @@DIRECTIVE_NAME + end + groups @@DIRECTIVE, type, @@DIRECTIVE_ARGS, Comment + end + + mixin :check_field + #mixin :check_comment + mixin :body + end + + state :check_field do + rule /^([a-z+])(:)([ \t]*)/i do |m| + field_type = @@FIELD + next_state = nil + # + means repeat last field + if m[1] == "+" + tmp_field = @last_field + else + tmp_field = m[1] unless m[1] == "+" + end + case tmp_field + when "X" + field_type = @@XREF + next_state = :xref + when "w" + next_state = :lyrics + when "K" + field_type = @@KEY + when "T" + field_type = @@TITLE + end + field_type = Error if m[1] == "+" && !@last_field + groups field_type, Punctuation, Text + if next_state + push next_state + else + push do + case tmp_field + when "T" + field_value_type = field_type + else + field_value_type = @@FIELD_VALUE + end + mixin :entity + mixin :line_continuation + mixin :escape + rule @@RE_COMMENT, Comment + rule @@RE_EOL, Text, :pop! + rule /[^\r\n]/, field_value_type + end + end + if /[XI]/ =~ tmp_field + @last_field = nil + else + @last_field = m[1] unless field_type == Error || m[1] == "+" + end + end + end + + state :xref do + rule /\d+/, @@XREF_VALUE + rule /\d+#{@@RE_EOL}/o, @@XREF_VALUE, :pop! + + rule /([ \t]*)(%[^\r\n]*)/ do + groups Text, Comment + end + rule @@RE_EOL, Text, :pop! + mixin :check_comment + rule /[^\r\n]*/, Error + end + + state :decorations do + rule /(!)([^!\r\n]+?)(!)/ do |m| + if (%r|#{decorations.map{|s|Regexp.escape s}.join('|')}|o =~ m[2]) + tokenType = @@BUILTIN_DECORATION + else + tokenType = @@DECORATION + end + groups Punctuation, tokenType, Punctuation + end + end + + state :body do + mixin :decorations + rule /(\[)([a-z])(:)([^\r\n]*?)(\])/i do |w| + if w[2] == "K" + field_type = @@KEY + else + field_type = @@FIELD + end + groups @@INLINE_FIELD, field_type, Punctuation, @@FIELD_VALUE, @@INLINE_FIELD + end + + rule /\([2-9]((:[\(2-9]?)){0,2}/, @@TUPLET + + rule /([=_^]*)([a-g])([,\']*)([<>\d\/]*)/i do + groups @@ACCIDENTAL, @@NOTE, @@OCTAVE, @@DURATION + end + + rule /[_^]/, @@ACCIDENTAL + rule /[,\']/, @@ACCIDENTAL + rule %r/ + ([.:]?\|+)? + [ \t]*?\[[1-9]+ # Alternate ending no. 1 + ([-,][1-9]+)* # More alternate endings + /x, @@BAR_LINE + rule /[|:\[\]]/, @@BAR_LINE + rule /(")([?<>@^]?)/ do + groups Str::Double, Operator + push :string + end + + rule /[.~HJLMNOPRSTuv]/, @@BUILTIN_DECORATION + rule /[<>\d\/]/, @@DURATION + + rule /(\{\/?|})/i, @@GRACE + rule /[xz]/i, @@REST + + rule /y&/, Operator + + rule /\([,']?/, @@SLUR + rule /\)/, @@SLUR + rule /-/, @@TIE + rule /\s+/, Text + mixin :line_continuation + + # http://abcnotation.com/wiki/abc:standard:v2.2#tune_body + rule /[#*;?@]/, Error # Reserved for future use + # All printable ASCII characters may be used in tune body + mixin :check_comment + rule /[ -~]/, @@UNKNOWN + # Everything else is an error + end + + state :line_continuation do + rule /(\\[ \t]*)(%[^\r\n]*?)?(#{@@RE_EOL})/o do + groups Text::Whitespace, Comment, Text::Whitespace + end + end + + state :escape do + rule /(\\)(.)/ do + groups Str::Escape, @@FIELD_VALUE + end + end + + state :string do + mixin :line_continuation + mixin :entity + rule /\\"/, Str::Escape + rule /"/, Str::Double, :pop! + rule /#{@@RE_EOL}/o, Error, :pop! + rule /./, Str::Double + end + + state :lyrics do + mixin :line_continuation + mixin :check_comment + mixin :entity + rule /(\\)(%)/ do + groups Str::Escape, @@LYRICS_VALUE + end + rule /[-~_]/, Operator + rule /\|/, @@BAR_LINE + rule /[*]/, @@REST + rule /%[^\r\n]*?#{@@RE_EOL}/o, Comment, :pop! + rule @@RE_EOL, Text, :pop! + rule /./, @@LYRICS_VALUE + end + + state :entity do + rule /&\S*?;/, Name::Entity + rule /\\u[0-9a-f]{4}/i, Str::Escape + end + + state :check_comment do + rule /(%[^\r\n]*)$/, Comment + end + + state :begin_text do + rule /^(%%|I:)(end)(text)([^\r\n]*)/m do |n| + groups @@DIRECTIVE, @@DIRECTIVE_NAME, @@DIRECTIVE_BEGIN_LABEL, Comment + pop! + end + rule /^%%/, Text::Whitespace + rule /^\s*$/, Text, :pop! + rule /.+$/ do + token @@TEXT_MULTILINE + end + end + end + end +end diff --git a/spec/lexers/abc_spec.rb b/spec/lexers/abc_spec.rb new file mode 100644 index 0000000000..c5f5316da4 --- /dev/null +++ b/spec/lexers/abc_spec.rb @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Abc do + let(:subject) { Rouge::Lexers::Abc.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.abc' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/vnd.abc' + assert_guess :mimetype => 'text/x-abc' + end + end + end From 51de594e65ce085c1a92613cfa8cfff9467a8e16 Mon Sep 17 00:00:00 2001 From: Trond Aasan Date: Thu, 21 Feb 2019 19:22:02 +0100 Subject: [PATCH 3/7] Fix formatting errors --- lib/rouge/lexers/postscript.rb | 5 ++--- spec/lexers/abc_spec.rb | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/rouge/lexers/postscript.rb b/lib/rouge/lexers/postscript.rb index f9e360632a..331178a0b6 100644 --- a/lib/rouge/lexers/postscript.rb +++ b/lib/rouge/lexers/postscript.rb @@ -5,14 +5,13 @@ module Rouge module Lexers class PostScript < RegexLexer - """ +=begin Lexer for PostScript files. The PostScript Language Reference published by Adobe at is the authority for this. - """ - +=end title "PostScript" desc "PostScript" tag "postscript" diff --git a/spec/lexers/abc_spec.rb b/spec/lexers/abc_spec.rb index c5f5316da4..aea56d6272 100644 --- a/spec/lexers/abc_spec.rb +++ b/spec/lexers/abc_spec.rb @@ -2,18 +2,18 @@ # frozen_string_literal: true describe Rouge::Lexers::Abc do - let(:subject) { Rouge::Lexers::Abc.new } + let(:subject) { Rouge::Lexers::Abc.new } - describe 'guessing' do - include Support::Guessing + describe 'guessing' do + include Support::Guessing - it 'guesses by filename' do - assert_guess :filename => 'foo.abc' - end + it 'guesses by filename' do + assert_guess :filename => 'foo.abc' + end - it 'guesses by mimetype' do - assert_guess :mimetype => 'text/vnd.abc' - assert_guess :mimetype => 'text/x-abc' - end + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/vnd.abc' + assert_guess :mimetype => 'text/x-abc' end end +end From f13ca2e41a6a0154227a2bd396ffcf48917ce03c Mon Sep 17 00:00:00 2001 From: Trond Aasan Date: Thu, 21 Feb 2019 19:26:27 +0100 Subject: [PATCH 4/7] Add sample file --- spec/visual/samples/abc | 70 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 spec/visual/samples/abc diff --git a/spec/visual/samples/abc b/spec/visual/samples/abc new file mode 100644 index 0000000000..7dc4d60934 --- /dev/null +++ b/spec/visual/samples/abc @@ -0,0 +1,70 @@ +I:abc-charset UTF-8 % This is a builtin instruction field with a trailing comment ++: Error % I: fields can not be repeated +%%abc-version 2.2 % Instruction fields can also start with %% +\linebreak % or \. The \ option is not in the standard, but some software supports it + %% This is not an instruction, but a comment +X: 1 +T: The Three Blind Mice ++: This is a repeated field +O: Old Round. +%R: air, jig, march +B: "The Everyday Song Book", 1927 +F: http://www.library.pitt.edu/happybirthday/pdf/The_Everyday_Song_Book.pdf +Z: 2017 John Chambers +M: 6/8 +L: 1/8 +K: Eb +% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +[| "^1"G3 F3 | E3 z2z | G3 F3 | E3 z2z | "^2"B3 A2 A | G3 z2z | +w: Three blind mice, three blind mice; See how they run; +% +B3 A2 A | G3 z2B | "^3"e2 e d c d | e2 B B2 B | ++: see how they run; They all ran aft-er the farm-er's wife, She +% +e e e d c d | e2 B B2 B | "^4"e e e ++: cut off their tails with a carv-ing knife; Did ev-er you +% +d c d | e B B B2 A | G3 F3 | !fermata!E3 z2z |] ++: see such a sight in your life As three blind mice? +% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +%%begintext align % Runs until %%endtext. /^%%/ is whitespace +%% A version of this rhyme, together with music, was +%% published in Deuteromelia or The Seconde part of Musicks melodie +%% (1609). The editor of the book, and possible author of the rhyme, was +%% Thomas Ravenscroft. The original lyrics are: +%%endtext +%%begintext +%% Three Blinde Mice, +%% Three Blinde Mice, +%% Dame Iulian, +%% Dame Iulian, +%% the Miller and his merry olde Wife, +%% she scrapte her tripe licke thou the knife. +%% +%%endtext +%%begintext align +%% The rhyme only entered children's literature in 1842 when it was +%% published in a collection by James Orchard Halliwell. +%% +%% From https://en.wikipedia.org/wiki/Three_Blind_Mice +%%endtext + +X: 2 +T: Fantasi +K: Ador +% Tuplet +(3 BAB (3:: BAB (3:2 BAB (3:2:3 BAB (3::2 BAB (3:2:2 BAB +% Non-tuplet +(3:2:2:5 BAB +% Builtin decorations +a!trill!aTbHC +% Othoer decorations +A2!yeah!C4 +% Durations +A2 A/ A < B A > B +% Bar lines +A |: BC :| EF | [2 GB [|] =A2 :: B |[ C :|: D :||: E :|||: F +% More bar lines +faf gfe|[1 dfe dBA:|[2 d2e dcB|]\ +[1,3,5-7 dcA .|[1,3,5-7,9-13,22 dcA +w: &SGML; entities and \u0075nicode literals are properly highlighted From 46be13df9d6c89b5e08c8ffd7e3df339cc2d8255 Mon Sep 17 00:00:00 2001 From: Trond Aasan Date: Thu, 21 Feb 2019 20:34:03 +0100 Subject: [PATCH 5/7] Detect PostScript from source --- lib/rouge/lexers/postscript.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/rouge/lexers/postscript.rb b/lib/rouge/lexers/postscript.rb index 331178a0b6..86a8495ebf 100644 --- a/lib/rouge/lexers/postscript.rb +++ b/lib/rouge/lexers/postscript.rb @@ -19,6 +19,10 @@ class PostScript < RegexLexer filenames "*.ps", "*.eps" mimetypes "application/postscript" + def self.detect?(text) + /%^%!/ + end + delimiter = %s"()<>\[\]{}/%\s" delimiter_end = Regexp.new("(?=[#{delimiter}])") valid_name_chars = Regexp.new("[^#{delimiter}]") From 5b4d72f8c520b43e07949908df1f540638561fe9 Mon Sep 17 00:00:00 2001 From: Trond Aasan Date: Thu, 21 Feb 2019 20:34:50 +0100 Subject: [PATCH 6/7] Add PostScript lexer spec --- spec/lexers/postscript_spec.rb | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 spec/lexers/postscript_spec.rb diff --git a/spec/lexers/postscript_spec.rb b/spec/lexers/postscript_spec.rb new file mode 100644 index 0000000000..c725153800 --- /dev/null +++ b/spec/lexers/postscript_spec.rb @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::PostScript do + let(:subject) { Rouge::Lexers::PostScript.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.ps' + assert_guess :filename => 'foo.eps' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'application/postscript' + end + + it 'guesses by source' do + assert_guess :source => '%!PS' + assert_guess :source => '%!PS-Adobe-3.0' + end + end +end From e99196ea8e8f8049fef5f2fa3c98471355cd2984 Mon Sep 17 00:00:00 2001 From: Trond Aasan Date: Thu, 21 Feb 2019 20:48:24 +0100 Subject: [PATCH 7/7] Fix typo --- spec/visual/samples/abc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/visual/samples/abc b/spec/visual/samples/abc index 7dc4d60934..f35f6ccdf5 100644 --- a/spec/visual/samples/abc +++ b/spec/visual/samples/abc @@ -58,7 +58,7 @@ K: Ador (3:2:2:5 BAB % Builtin decorations a!trill!aTbHC -% Othoer decorations +% Other decorations A2!yeah!C4 % Durations A2 A/ A < B A > B