From 3a0e6e378d535220ebc9924387a25b8303d55b7f Mon Sep 17 00:00:00 2001 From: Steve Mokris Date: Mon, 12 Oct 2020 20:29:17 -0400 Subject: [PATCH] Add e-mail lexer (#1567) This commit adds a lexer for e-mails. It follows RFC 2822 as well as supporting quoted lines in the form described in RFC 3676. Co-authored-by: Michael Camilleri --- lib/rouge/demos/email | 11 +++++++++++ lib/rouge/lexers/email.rb | 39 +++++++++++++++++++++++++++++++++++++++ spec/lexers/email_spec.rb | 18 ++++++++++++++++++ spec/visual/samples/email | 23 +++++++++++++++++++++++ 4 files changed, 91 insertions(+) create mode 100644 lib/rouge/demos/email create mode 100644 lib/rouge/lexers/email.rb create mode 100644 spec/lexers/email_spec.rb create mode 100644 spec/visual/samples/email diff --git a/lib/rouge/demos/email b/lib/rouge/demos/email new file mode 100644 index 0000000000..b355729347 --- /dev/null +++ b/lib/rouge/demos/email @@ -0,0 +1,11 @@ +From: Me +To: You +Date: Tue, 21 Jul 2020 15:14:03 +0000 +Subject: A very important message + +> Please investigate. Thank you. + +I have investigated. + +-- +This message is highly confidential and will self-destruct. diff --git a/lib/rouge/lexers/email.rb b/lib/rouge/lexers/email.rb new file mode 100644 index 0000000000..aef2f641f8 --- /dev/null +++ b/lib/rouge/lexers/email.rb @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class Email < RegexLexer + tag 'email' + aliases 'eml', 'e-mail' + filenames '*.eml' + mimetypes 'message/rfc822' + + title "Email" + desc "An email message" + + start do + push :fields + end + + state :fields do + rule %r/[:]/, Operator, :field_body + rule %r/[^\n\r:]+/, Name::Tag + rule %r/[\n\r]/, Name::Tag + end + + state :field_body do + rule(/(\r?\n){2}/) { token Text; pop!(2) } + rule %r/\r?\n(?![ \v\t\f])/, Text, :pop! + rule %r/[^\n\r]+/, Name::Attribute + rule %r/[\n\r]/, Name::Attribute + end + + state :root do + rule %r/\n/, Text + rule %r/^>.*/, Comment + rule %r/.*/, Text + end + end + end +end diff --git a/spec/lexers/email_spec.rb b/spec/lexers/email_spec.rb new file mode 100644 index 0000000000..b511498824 --- /dev/null +++ b/spec/lexers/email_spec.rb @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Email do + let(:subject) { Rouge::Lexers::Email.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.eml' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'message/rfc822' + end + end +end diff --git a/spec/visual/samples/email b/spec/visual/samples/email new file mode 100644 index 0000000000..8d46f7e871 --- /dev/null +++ b/spec/visual/samples/email @@ -0,0 +1,23 @@ +From: Me +To: You +Cc: Somebody , + And One More +Bcc: Secret Person +X-Spam-Status: Definitely not spam +Date: Tue, 21 Jul 2020 15:14:03 +0000 +Subject: RE: A very important message + that continues onto the next line. + +Greetings and salutations. + +>> A second-level quotation. +> +> Please investigate. Thank you. + +I have investigated. + +Note: A space-stuffed line starting with > is not a quote. + > like this. + +-- +This message is highly confidential and will self-destruct.