Skip to content

Commit

Permalink
added Format trait to support different BNF syntaxes, and a basic imp…
Browse files Browse the repository at this point in the history
…lementation of ABNF to demonstrate (#154)

* added support for some ABNF features, with a good bit of code duplication

* refactored formats into traits that are passed around via type parameters ala parse_from::<ABNF>(input)

* added support for comments yay

* added autodetecion of format in Grammar::FromStr(), tested slightly in tests/grammar.rs

* added adefault feature flag for ABNF since the monomorphization could increase compiletimes more than you'd expect(maybe?)

* fixed a bug where comments at the start of a grammar would confuse the auto-detector

* made ABNF nonterminals less... all-consuming
  • Loading branch information
Carlyle-Foster authored Jan 5, 2025
1 parent c6fb531 commit 2479db7
Show file tree
Hide file tree
Showing 12 changed files with 322 additions and 95 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ version = "0.5.0"
default-features = false # to disable Rayon for wasm32

[features]
default = ["serde"]
default = ["ABNF", "serde"]
ABNF = []
serde = ["dep:serde", "dep:serde_json"]
unstable = []
tracing = ["dep:tracing", "dep:tracing-subscriber", "dep:tracing-flame"]
Expand Down
127 changes: 127 additions & 0 deletions src/augmented.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
use crate::parsers::Format;
use crate::term::Term;

use nom::{
bytes::complete::{tag, take, take_till},
character::complete::{self, satisfy},
combinator::{complete, not},
error::VerboseError,
sequence::{preceded, terminated},
IResult,
};

#[non_exhaustive]
pub struct ABNF;

impl Format for ABNF {
fn prod_lhs(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
let (input, nt) = take_till(char::is_whitespace)(input)?;

let (input, _) = preceded(complete::multispace0, complete::char('='))(input)?;

Ok((input, Term::Nonterminal(nt.to_string())))
}

fn nonterminal(input: &str) -> IResult<&str, Term, VerboseError<&str>> {
satisfy(|c: char| c.is_alphanumeric() || c == '_')(input)?;
let (input, nt) = complete(terminated(
take_till(char::is_whitespace),
complete::multispace0,
))(input)?;
take(1_usize)(nt)?;

not(complete(tag("=")))(input)?;

Ok((input, Term::Nonterminal(nt.to_string())))
}
}

#[cfg(test)]
mod tests {
use super::ABNF;
use crate::parsers::*;

use crate::expression::Expression;
use crate::grammar::Grammar;
use crate::production::Production;
use crate::term::Term;

fn construct_nonterminal_tuple() -> (Term, String) {
let nonterminal_pattern = "nonterminal-pattern";
let nonterminal_value = "nonterminal-pattern";
let nonterminal_object = Term::Nonterminal(nonterminal_value.to_string());

(nonterminal_object, nonterminal_pattern.to_string())
}

#[test]
fn nonterminal_match() {
let nonterminal_tuple = construct_nonterminal_tuple();
assert_eq!(
nonterminal_tuple.0,
ABNF::nonterminal(nonterminal_tuple.1.as_str()).unwrap().1
);
}

fn construct_expression_tuple() -> (Expression, String) {
let nonterminal_tuple = construct_nonterminal_tuple();
let terminal_tuple = tests::construct_terminal_tuple();
let expression_pattern = nonterminal_tuple.1 + " " + terminal_tuple.1.as_str();
let expression_object = Expression::from_parts(vec![nonterminal_tuple.0, terminal_tuple.0]);

(expression_object, expression_pattern)
}

#[test]
fn expression_match() {
let expression_tuple = construct_expression_tuple();
assert_eq!(
expression_tuple.0,
expression::<ABNF>(expression_tuple.1.as_str()).unwrap().1
);
}

fn construct_production_tuple() -> (Production, String) {
let expression_tuple = construct_expression_tuple();
let nonterminal_tuple = construct_nonterminal_tuple();
let terminal_tuple = construct_nonterminal_tuple();
let production_pattern =
nonterminal_tuple.1 + " = " + &expression_tuple.1 + " | " + &terminal_tuple.1;
let production_object = Production::from_parts(
nonterminal_tuple.0,
vec![
expression_tuple.0,
Expression::from_parts(vec![terminal_tuple.0]),
],
);

(production_object, production_pattern)
}

#[test]
fn production_match() {
let production_tuple = construct_production_tuple();
let parsed = production::<ABNF>(production_tuple.1.as_str());
assert_eq!(production_tuple.0, parsed.unwrap().1);
}

fn construct_grammar_tuple() -> (Grammar, String) {
let production_tuple = construct_production_tuple();
let grammar_pattern = production_tuple.1.clone() + " " + &production_tuple.1;
let grammar_object = Grammar::from_parts(vec![
construct_production_tuple().0,
construct_production_tuple().0,
]);

(grammar_object, grammar_pattern)
}

#[test]
fn grammar_match() {
let grammar_tuple = construct_grammar_tuple();
assert_eq!(
grammar_tuple.0,
grammar::<ABNF>(grammar_tuple.1.as_str()).unwrap().1
);
}
}
4 changes: 2 additions & 2 deletions src/expression.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(clippy::vec_init_then_push)]

use crate::error::Error;
use crate::parsers;
use crate::parsers::{self, BNF};
use crate::term::Term;
use std::fmt;
use std::ops;
Expand Down Expand Up @@ -151,7 +151,7 @@ impl FromStr for Expression {
type Err = Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match parsers::expression_complete(s) {
match parsers::expression_complete::<BNF>(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
Expand Down
30 changes: 27 additions & 3 deletions src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

use crate::error::Error;
use crate::expression::Expression;
use crate::parsers;
use crate::parsers::{self, Format, BNF};
use crate::production::Production;
use crate::term::Term;
#[cfg(feature = "ABNF")]
use crate::ABNF;
use rand::{rngs::StdRng, seq::SliceRandom, thread_rng, Rng, SeedableRng};

#[cfg(feature = "serde")]
Expand Down Expand Up @@ -227,6 +229,14 @@ impl Grammar {
Grammar { productions: v }
}

/// parse a grammar given a format
pub fn parse_from<F: Format>(input: &str) -> Result<Self, self::Error> {
match parsers::grammar_complete::<F>(input) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
}

/// Add `Production` to the `Grammar`
pub fn add_production(&mut self, prod: Production) {
self.productions.push(prod);
Expand Down Expand Up @@ -492,9 +502,23 @@ impl fmt::Display for Grammar {

impl str::FromStr for Grammar {
type Err = Error;

#[cfg(feature = "ABNF")]
fn from_str(s: &str) -> Result<Self, Self::Err> {
//try and autodetect the format (in the feature we'll use a detector that returns an enum, hence the gratuitous switch case)
match parsers::is_format_standard_bnf(s) {
true => match parsers::grammar_complete::<BNF>(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
},
false => match parsers::grammar_complete::<ABNF>(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
},
}
}
#[cfg(not(feature = "ABNF"))]
fn from_str(s: &str) -> Result<Self, Self::Err> {
match parsers::grammar_complete(s) {
match parsers::grammar_complete::<BNF>(s) {
Result::Ok((_, o)) => Ok(o),
Result::Err(e) => Err(Error::from(e)),
}
Expand Down
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#![doc = include_str!("../README.md")]

mod append_vec;
#[cfg(feature = "ABNF")]
mod augmented;
mod earley;
mod error;
mod expression;
Expand All @@ -15,4 +17,8 @@ pub use crate::grammar::{Grammar, ParseTree, ParseTreeNode};
pub use crate::production::Production;
pub use crate::term::Term;

#[cfg(feature = "ABNF")]
pub use augmented::ABNF;
pub use parsers::{Format, BNF};

pub(crate) use hashbrown::HashMap;
Loading

0 comments on commit 2479db7

Please sign in to comment.