Skip to content

Commit

Permalink
🔀 Merge pull request #225 from nevans/parser/better-faster-cleaner-st…
Browse files Browse the repository at this point in the history
…atus

âš¡ Better Faster Cleaner `STATUS` parsing
  • Loading branch information
nevans authored Nov 13, 2023
2 parents 8a60524 + 8070925 commit dcbdb21
Show file tree
Hide file tree
Showing 4 changed files with 314 additions and 26 deletions.
24 changes: 21 additions & 3 deletions lib/net/imap/response_data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

module Net
class IMAP < Protocol
autoload :FetchData, File.expand_path("fetch_data", __dir__)
autoload :FetchData, "#{__dir__}/fetch_data"
autoload :SequenceSet, "#{__dir__}/sequence_set"

# Net::IMAP::ContinuationRequest represents command continuation requests.
#
Expand Down Expand Up @@ -71,7 +72,7 @@ class IgnoredResponse < UntaggedResponse
# unknown extensions to response types without a well-defined extension
# grammar.
#
# See also: UnparsedNumericResponseData
# See also: UnparsedNumericResponseData, ExtensionData, IgnoredResponse
class UnparsedData < Struct.new(:unparsed_data)
##
# method: unparsed_data
Expand All @@ -87,7 +88,7 @@ class UnparsedData < Struct.new(:unparsed_data)
# Net::IMAP::UnparsedNumericResponseData represents data for unhandled
# response types with a numeric prefix. See the documentation for #number.
#
# See also: UnparsedData
# See also: UnparsedData, ExtensionData, IgnoredResponse
class UnparsedNumericResponseData < Struct.new(:number, :unparsed_data)
##
# method: number
Expand All @@ -106,6 +107,23 @@ class UnparsedNumericResponseData < Struct.new(:number, :unparsed_data)
# The unparsed data, not including #number or UntaggedResponse#name.
end

# **Note:** This represents an intentionally _unstable_ API. Where
# instances of this class are returned, future releases may return a
# different (incompatible) object <em>without deprecation or warning</em>.
#
# Net::IMAP::ExtensionData represents data that is parsable according to the
# forward-compatible extension syntax in RFC3501, RFC4466, or RFC9051, but
# isn't directly known or understood by Net::IMAP yet.
#
# See also: UnparsedData, UnparsedNumericResponseData, IgnoredResponse
class ExtensionData < Struct.new(:data)
##
# method: data
# :call-seq: data -> string
#
# The parsed extension data.
end

# Net::IMAP::TaggedResponse represents tagged responses.
#
# The server completion result response indicates the success or
Expand Down
216 changes: 193 additions & 23 deletions lib/net/imap/response_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,56 @@ module RFC3629
# ; Is a valid RFC 3501 "atom".
TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n

# nz-number = digit-nz *DIGIT
# ; Non-zero unsigned 32-bit integer
# ; (0 < n < 4,294,967,296)
NZ_NUMBER = /[1-9]\d*/n

# seq-number = nz-number / "*"
# ; message sequence number (COPY, FETCH, STORE
# ; commands) or unique identifier (UID COPY,
# ; UID FETCH, UID STORE commands).
# ; * represents the largest number in use. In
# ; the case of message sequence numbers, it is
# ; the number of messages in a non-empty mailbox.
# ; In the case of unique identifiers, it is the
# ; unique identifier of the last message in the
# ; mailbox or, if the mailbox is empty, the
# ; mailbox's current UIDNEXT value.
# ; The server should respond with a tagged BAD
# ; response to a command that uses a message
# ; sequence number greater than the number of
# ; messages in the selected mailbox. This
# ; includes "*" if the selected mailbox is empty.
SEQ_NUMBER = /#{NZ_NUMBER}|\*/n

# seq-range = seq-number ":" seq-number
# ; two seq-number values and all values between
# ; these two regardless of order.
# ; Example: 2:4 and 4:2 are equivalent and
# ; indicate values 2, 3, and 4.
# ; Example: a unique identifier sequence range of
# ; 3291:* includes the UID of the last message in
# ; the mailbox, even if that value is less than
# ; 3291.
SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n

# sequence-set = (seq-number / seq-range) ["," sequence-set]
# ; set of seq-number values, regardless of order.
# ; Servers MAY coalesce overlaps and/or execute
# ; the sequence in any order.
# ; Example: a message sequence number set of
# ; 2,4:7,9,12:* for a mailbox with 15 messages is
# ; equivalent to 2,4,5,6,7,9,12,13,14,15
# ; Example: a message sequence number set of
# ; *:4,5:7 for a mailbox with 10 messages is
# ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
# ; be reordered and overlap coalesced to be
# ; 4,5,6,7,8,9,10.
SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n

# RFC3501:
# literal = "{" number "}" CRLF *CHAR8
# ; Number represents the number of CHAR8s
Expand Down Expand Up @@ -405,6 +455,24 @@ def unescape_quoted(quoted)
# ATOM-CHAR = <any CHAR except atom-specials>
ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]

SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]

# sequence-set = (seq-number / seq-range) ["," sequence-set]
# sequence-set =/ seq-last-command
# ; Allow for "result of the last command"
# ; indicator.
# seq-last-command = "$"
#
# *note*: doesn't match seq-last-command
def sequence_set
str = combine_adjacent(*SEQUENCE_SET_TOKENS)
if Patterns::SEQUENCE_SET_STR.match?(str)
SequenceSet.new(str)
else
parse_error("unexpected atom %p, expected sequence-set", str)
end
end

# ASTRING-CHAR = ATOM-CHAR / resp-specials
# resp-specials = "]"
ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
Expand Down Expand Up @@ -488,6 +556,60 @@ def case_insensitive__nstring
NIL? ? nil : case_insensitive__string
end

# tagged-ext-comp = astring /
# tagged-ext-comp *(SP tagged-ext-comp) /
# "(" tagged-ext-comp ")"
# ; Extensions that follow this general
# ; syntax should use nstring instead of
# ; astring when appropriate in the context
# ; of the extension.
# ; Note that a message set or a "number"
# ; can always be represented as an "atom".
# ; A URL should be represented as
# ; a "quoted" string.
def tagged_ext_comp
vals = []
while true
vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
when T_NUMBER then number
else astring
end
SP? or break
end
vals
end

# tagged-ext-simple is a subset of atom
# TODO: recognize sequence-set in the lexer
#
# tagged-ext-simple = sequence-set / number / number64
def tagged_ext_simple
number? || sequence_set
end

# tagged-ext-val = tagged-ext-simple /
# "(" [tagged-ext-comp] ")"
def tagged_ext_val
if lpar?
_ = peek_rpar? ? [] : tagged_ext_comp
rpar
_
else
tagged_ext_simple
end
end

# mailbox = "INBOX" / astring
# ; INBOX is case-insensitive. All case variants of
# ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
# ; not as an astring. An astring which consists of
# ; the case-insensitive sequence "I" "N" "B" "O" "X"
# ; is considered to be INBOX and not an astring.
# ; Refer to section 5.1 for further
# ; semantic details of mailbox names.
alias mailbox astring

# valid number ranges are not enforced by parser
# number64 = 1*DIGIT
# ; Unsigned 63-bit integer
Expand Down Expand Up @@ -1396,31 +1518,79 @@ def thread_branch(token)
return rootmember
end

# mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
def mailbox_data__status
token = match(T_ATOM)
name = token.value.upcase
match(T_SPACE)
mailbox = astring
match(T_SPACE)
match(T_LPAR)
attr = {}
while true
token = lookahead
case token.symbol
when T_RPAR
shift_token
break
when T_SPACE
shift_token
resp_name = label("STATUS"); SP!
mbox_name = mailbox; SP!
lpar; attr = status_att_list; rpar
UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
end

# RFC3501
# status-att-list = status-att SP number *(SP status-att SP number)
# RFC4466, RFC9051, and RFC3501 Errata
# status-att-list = status-att-val *(SP status-att-val)
def status_att_list
attrs = [status_att_val]
while SP? do attrs << status_att_val end
attrs.to_h
end

# RFC3501 Errata:
# status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
# ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
# ("UNSEEN" SP number)
# RFC4466:
# status-att-val = ("MESSAGES" SP number) /
# ("RECENT" SP number) /
# ("UIDNEXT" SP nz-number) /
# ("UIDVALIDITY" SP nz-number) /
# ("UNSEEN" SP number)
# ;; Extensions to the STATUS responses
# ;; should extend this production.
# ;; Extensions should use the generic
# ;; syntax defined by tagged-ext.
# RFC9051:
# status-att-val = ("MESSAGES" SP number) /
# ("UIDNEXT" SP nz-number) /
# ("UIDVALIDITY" SP nz-number) /
# ("UNSEEN" SP number) /
# ("DELETED" SP number) /
# ("SIZE" SP number64)
# ; Extensions to the STATUS responses
# ; should extend this production.
# ; Extensions should use the generic
# ; syntax defined by tagged-ext.
# RFC7162:
# status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
# ;; Extends non-terminal defined in [RFC4466].
# ;; Value 0 denotes that the mailbox doesn't
# ;; support persistent mod-sequences
# ;; as described in Section 3.1.2.2.
# RFC7889:
# status-att-val =/ "APPENDLIMIT" SP (number / nil)
# ;; status-att-val is defined in RFC 4466
# RFC8438:
# status-att-val =/ "SIZE" SP number64
# RFC8474:
# status-att-val =/ "MAILBOXID" SP "(" objectid ")"
# ; follows tagged-ext production from [RFC4466]
def status_att_val
key = tagged_ext_label
SP!
val =
case key
when "MESSAGES" then number # RFC3501, RFC9051
when "UNSEEN" then number # RFC3501, RFC9051
when "DELETED" then number # RFC3501, RFC9051
when "UIDNEXT" then nz_number # RFC3501, RFC9051
when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
when "RECENT" then number # RFC3501 (obsolete)
when "SIZE" then number64 # RFC8483, RFC9051
else
number? || ExtensionData.new(tagged_ext_val)
end
token = match(T_ATOM)
key = token.value.upcase
match(T_SPACE)
val = number
attr[key] = val
end
data = StatusData.new(mailbox, attr)
return UntaggedResponse.new(name, data, @str)
[key, val]
end

# The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
Expand Down
67 changes: 67 additions & 0 deletions lib/net/imap/sequence_set.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# frozen_string_literal: true

module Net
class IMAP

##
# An IMAP {sequence
# set}[https://www.rfc-editor.org/rfc/rfc9051.html#section-4.1.1],
# is a set of message sequence numbers or unique identifier numbers
# ("UIDs"). It contains numbers and ranges of numbers. The numbers are all
# non-zero unsigned 32-bit integers and one special value, <tt>*</tt>, that
# represents the largest value in the mailbox.
#
# *NOTE:* This SequenceSet class is currently a placeholder for unhandled
# extension data. All it does now is validate. It will be expanded to a
# full API in a future release.
class SequenceSet

def self.[](str) new(str).freeze end

def initialize(input)
@atom = -String.try_convert(input)
validate
end

# Returns the IMAP string representation. In the IMAP grammar,
# +sequence-set+ is a subset of +atom+ which is a subset of +astring+.
attr_accessor :atom

# Returns #atom. In the IMAP grammar, +atom+ is a subset of +astring+.
alias astring atom

# Returns the value of #atom
alias to_s atom

# Hash equality requires the same encoded #atom representation.
#
# Net::IMAP::SequenceSet["1:3"] .eql? Net::IMAP::SequenceSet["1:3"] # => true
# Net::IMAP::SequenceSet["1,2,3"].eql? Net::IMAP::SequenceSet["1:3"] # => false
# Net::IMAP::SequenceSet["1,3"] .eql? Net::IMAP::SequenceSet["3,1"] # => false
# Net::IMAP::SequenceSet["9,1:*"].eql? Net::IMAP::SequenceSet["1:*"] # => false
#
def eql?(other) self.class == other.class && atom == other.atom end
alias == eql?

# See #eql?
def hash; [self.class. atom].hash end

def inspect
(frozen? ? "%s[%p]" : "#<%s %p>") % [self.class, to_s]
end

# Unstable API, for internal use only (Net::IMAP#validate_data)
def validate # :nodoc:
ResponseParser::Patterns::SEQUENCE_SET_STR.match?(@atom) or
raise ArgumentError, "invalid sequence-set: %p" % [input]
true
end

# Unstable API, for internal use only (Net::IMAP#send_data)
def send_data(imap, tag) # :nodoc:
imap.__send__(:put_string, atom)
end

end
end
end
Loading

0 comments on commit dcbdb21

Please sign in to comment.