Skip to content

Commit

Permalink
comments are now allowed inside of any whitespace (resent because i m…
Browse files Browse the repository at this point in the history
…essed up last time) (#159)

* comments are now allowed inside of any whitespace

* simplified the '*_complete' functions
  • Loading branch information
Carlyle-Foster authored Jan 12, 2025
1 parent 1e9eca4 commit ce60ac2
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 92 deletions.
19 changes: 10 additions & 9 deletions src/parsers/augmented.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,35 @@ use crate::parsers::Format;
use crate::term::Term;

use nom::{
bytes::complete::{tag, take, take_till},
bytes::complete::{tag, take_till},
character::complete::{self, satisfy},
combinator::{complete, not},
error::VerboseError,
sequence::{preceded, terminated},
IResult,
};

use super::whitespace_plus_comments;

#[non_exhaustive]
pub struct ABNF;

impl Format for ABNF {
fn prod_lhs(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
let (input, nt) = take_till(char::is_whitespace)(input)?;

let (input, _) = preceded(complete::multispace0, complete::char('='))(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();
let (input, _) = complete::char('=')(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, Term::Nonterminal(nt.to_string())))
}

fn nonterminal(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
satisfy(|c: char| c.is_alphanumeric() || c == '_')(input)?;
let (input, nt) = complete(terminated(
take_till(char::is_whitespace),
complete::multispace0,
))(input)?;
take(1_usize)(nt)?;
satisfy(|c: char| c.is_alphabetic() || c == '_')(input)?;
let (input, nt) = take_till(char::is_whitespace)(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();

//if this is the lefhandside of an expression then prod_lhs() should parse this
not(complete(tag("=")))(input)?;

Ok((input, Term::Nonterminal(nt.to_string())))
Expand Down
18 changes: 9 additions & 9 deletions src/parsers/bnf.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::Format;
use super::{whitespace_plus_comments, Format};

use crate::term::Term;

Expand All @@ -7,7 +7,7 @@ use nom::{
character::complete,
combinator::{complete, not},
error::VerboseError,
sequence::{delimited, preceded, terminated},
sequence::delimited,
IResult,
};

Expand All @@ -18,19 +18,19 @@ impl Format for BNF {
fn prod_lhs(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
let (input, nt) =
delimited(complete::char('<'), take_until(">"), complete::char('>'))(input)?;

let (input, _) = preceded(complete::multispace0, tag("::="))(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();
let (input, _) = tag("::=")(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, Term::Nonterminal(nt.to_string())))
}

fn nonterminal(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
let (input, nt) = complete(delimited(
complete::char('<'),
take_until(">"),
terminated(complete::char('>'), complete::multispace0),
))(input)?;
let (input, nt) =
delimited(complete::char('<'), take_until(">"), complete::char('>'))(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();

//if this is the lefhandside of an expression then prod_lhs() should parse this
not(complete(tag("::=")))(input)?;

Ok((input, Term::Nonterminal(nt.to_string())))
Expand Down
109 changes: 44 additions & 65 deletions src/parsers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ use nom::{
branch::alt,
bytes::complete::{take_till, take_until},
character::complete::{self, multispace0},
combinator::{all_consuming, complete, eof, not, peek, recognize},
combinator::{all_consuming, complete, eof, opt, peek, recognize},
error::VerboseError,
multi::{many0, many1},
sequence::{delimited, preceded, terminated},
multi::many1,
sequence::{delimited, preceded},
IResult,
};

Expand All @@ -28,56 +28,49 @@ pub trait Format {

pub fn terminal(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
let (input, t) = alt((
delimited(
complete::char('"'),
take_until("\""),
terminated(complete::char('"'), complete::multispace0),
),
delimited(
complete::char('\''),
take_until("'"),
terminated(complete::char('\''), complete::multispace0),
),
delimited(complete::char('"'), take_until("\""), complete::char('"')),
delimited(complete::char('\''), take_until("'"), complete::char('\'')),
))(input)?;

let (input, _) = whitespace_plus_comments(input).unwrap();

Ok((input, Term::Terminal(t.to_string())))
}

pub fn comment(input: &str) -> IResult<&str, &str, VerboseError<&str>> {
let (input, comment) = preceded(
complete::char(';'),
take_till(|c: char| c == '\r' || c == '\n' || c == ';'),
)(input)?;
not(complete::char(';'))(input)?;
Ok((input, comment))
///this should never fail, unwrap it when calling directly please!
pub fn whitespace_plus_comments(mut input: &str) -> IResult<&str, char, VerboseError<&str>> {
let mut old_input = input;
loop {
(input, _) = multispace0(input)?;
(input, _) = opt(preceded(
complete::char(';'),
take_till(|c: char| c == '\r' || c == '\n'),
))(input)?;

if input == old_input {
break;
}
old_input = input
}
Ok((input, '\0'))
}

pub fn is_format_standard_bnf(input: &str) -> bool {
match terminated(many0(preceded(multispace0, comment)), multispace0)(input) {
Ok(tuple) => {
let (input, _) = tuple;
complete::char::<&str, VerboseError<&str>>('<')(input).is_ok()
}
Err(_) => unreachable!("this pattern should always match"),
}
let (input, _) = whitespace_plus_comments(input).unwrap();
complete::char::<&str, VerboseError<&str>>('<')(input).is_ok()
}

pub fn term<F: Format>(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
alt((terminal, F::nonterminal))(input)
}

pub fn term_complete<F: Format>(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
let (input, t) = all_consuming(term::<F>)(input)?;

Ok((input, t))
all_consuming(term::<F>)(input)
}

pub fn expression_next<F: Format>(input: &str) -> IResult<&str, &str, VerboseError<&str>> {
let (input, _) = delimited(
complete::multispace0,
complete::char('|'),
complete::multispace0,
)(input)?;
let (input, _) = complete::char('|')(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();

complete(expression::<F>)(input)?;

Expand All @@ -88,63 +81,49 @@ pub fn expression<F: Format>(input: &str) -> IResult<&str, Expression, VerboseEr
term::<F>(input)?;

let (input, terms) = many1(complete(term::<F>))(input)?;
let (input, _) = delimited(
complete::multispace0,
alt((
peek(complete(eof)),
recognize(peek(complete::char(';'))),
expression_next::<F>,
recognize(peek(complete(F::prod_lhs))),
)),
complete::multispace0,
)(input)?;
let (input, _) = alt((
peek(complete(eof)),
expression_next::<F>,
recognize(peek(complete(F::prod_lhs))),
))(input)?;

Ok((input, Expression::from_parts(terms)))
}

pub fn expression_complete<F: Format>(
input: &str,
) -> IResult<&str, Expression, VerboseError<&str>> {
let (input, e) = all_consuming(expression::<F>)(input)?;

Ok((input, e))
all_consuming(expression::<F>)(input)
}

pub fn production<F: Format>(input: &str) -> IResult<&str, Production, VerboseError<&str>> {
let (input, _) = many0(preceded(complete::multispace0, comment))(input)?;
let (input, lhs) = delimited(complete::multispace0, F::prod_lhs, complete::multispace0)(input)?;
let (input, lhs) = F::prod_lhs(input)?;
let (input, rhs) = many1(complete(expression::<F>))(input)?;
let (input, _) = preceded(
complete::multispace0,
alt((
recognize(peek(complete(eof))),
comment,
recognize(peek(complete(F::prod_lhs))),
)),
)(input)?;
let (input, _) = whitespace_plus_comments(input).unwrap();
let (input, _) = alt((
recognize(peek(complete(eof))),
recognize(peek(complete(F::prod_lhs))),
))(input)?;

Ok((input, Production::from_parts(lhs, rhs)))
}

pub fn production_complete<F: Format>(
input: &str,
) -> IResult<&str, Production, VerboseError<&str>> {
let (input, p) = all_consuming(production::<F>)(input)?;

Ok((input, p))
all_consuming(production::<F>)(input)
}

pub fn grammar<F: Format>(input: &str) -> IResult<&str, Grammar, VerboseError<&str>> {
let (input, _) = whitespace_plus_comments(input).unwrap();
production::<F>(input)?;
let (input, prods) = many1(complete(production::<F>))(input.trim_end())?;
let (input, prods) = many1(complete(production::<F>))(input)?;

Ok((input, Grammar::from_parts(prods)))
}

pub fn grammar_complete<F: Format>(input: &str) -> IResult<&str, Grammar, VerboseError<&str>> {
let (input, g) = all_consuming(grammar::<F>)(input)?;

Ok((input, g))
all_consuming(grammar::<F>)(input)
}

#[cfg(test)]
Expand Down
9 changes: 0 additions & 9 deletions src/production.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,15 +311,6 @@ mod tests {
);
}

#[test]
fn parse_semicolon_separated() {
let result = Production::from_str("<base> ::= 'A' ; 'C' ; 'G' ; 'T'");
assert!(
matches!(result, Err(Error::ParseError(_))),
"production result should be error {result:?}"
);
}

#[test]
fn default_production_empty() {
let production = Production::default();
Expand Down

0 comments on commit ce60ac2

Please sign in to comment.