Skip to content

Commit

Permalink
Reworking for basic validator output
Browse files Browse the repository at this point in the history
  • Loading branch information
ahankinson committed Feb 9, 2023
1 parent 491841c commit ad4c42b
Show file tree
Hide file tree
Showing 5 changed files with 298 additions and 232 deletions.
Empty file added libmei/tools/__init__.py
Empty file.
3 changes: 2 additions & 1 deletion libmei/tools/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ output_dir: "../dist"
addons_dir: "../addons"
elements: false
namespace: "vrv"
datatypes: "./datatypes.yml"
datatypes: "./datatypes.yml"
basic_odd: "../mei/develop/mei-basic_compiled.odd"
61 changes: 60 additions & 1 deletion libmei/tools/cpp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -- coding: utf-8 --
import sys
from re import Pattern
from typing import Optional

Expand All @@ -9,6 +8,8 @@
import textwrap
from pathlib import Path

from schema import MeiSchema

lg = logging.getLogger('schemaparser')

NS_PREFIX_MAP = {
Expand Down Expand Up @@ -1157,6 +1158,61 @@ def copy_addons(namespace: str, addons_dir: Path, outdir: Path):
lg.debug("Wrote addon %s", outfile)


BASIC_VALID_CPP: str = """
std::map<std::string, std::vector<std::string>> basic = {{
{nameAttributeMap}
}};
"""


def create_basic_validator(configure: dict, outdir: Path):
basic_path = Path(configure["basic_odd"])
with basic_path.open("r") as basic_schema:
bschema = MeiSchema(basic_schema, resolve_elements=True)

flat_att_groups = {}
for mod, attgrp in bschema.attribute_group_structure.items():
for attgrpname, attrs in attgrp.items():
flat_att_groups[attgrpname] = attrs

elres: dict = {}
for module, elements in bschema.element_structure.items():
for elname, elattrs in elements.items():
out_list = []
for att in elattrs:
if isinstance(att, list):
out_list.extend(att)
else:
out_list.extend(flat_att_groups.get(att, []))
elres[elname] = out_list

formatted_attr_map: list = []
for elname, elattrs in elres.items():
attrlist = []
for att in elattrs:
if "|" in att:
# we have a namespaced attribute
ns, att = att.split("|")
prefix = NS_PREFIX_MAP[ns]
attrlist.append(f"{prefix}:{att}")
else:
attrlist.append(att)

if attrlist:
fmt_attr = "\", \"".join(attrlist)
fmt_attr_str = f'{{"{fmt_attr}"}}'
else:
fmt_attr_str = '{}'

fmt_attr_map = f' {{"{elname}", {fmt_attr_str}}},\n'
formatted_attr_map.append(fmt_attr_map)

name_attribute_map = "".join(formatted_attr_map).rstrip()
formatted_output = BASIC_VALID_CPP.format(nameAttributeMap=name_attribute_map)

# TODO: Actually write the output somewhere!


def create(schema, configure: dict) -> bool:
global DATATYPES
lg.debug("Begin Verovio C++ Output ...")
Expand Down Expand Up @@ -1184,5 +1240,8 @@ def create(schema, configure: dict) -> bool:
create_att_classes(ns, schema, outdir)
create_att_datatypes(ns, schema, outdir)

if configure["basic_odd"]:
create_basic_validator(configure, outdir)

lg.debug("Success!")
return True
231 changes: 1 addition & 230 deletions libmei/tools/parseschema2.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,249 +22,20 @@
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import logging
import re
import shutil
import sys
from argparse import ArgumentParser
from pathlib import Path
from re import Pattern

import yaml
from lxml import etree

import cpp
from schema import MeiSchema

logging.basicConfig(format="[%(asctime)s] [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)")
log = logging.getLogger('schemaparser')


# globals
TEI_NS = {"tei": "http://www.tei-c.org/ns/1.0"}
TEI_RNG_NS = {"tei": "http://www.tei-c.org/ns/1.0",
"rng": "http://relaxng.org/ns/structure/1.0"}
NAMESPACES = {'xml': 'http://www.w3.org/XML/1998/namespace',
'xlink': 'http://www.w3.org/1999/xlink'}
WHITESPACE_REGEX: Pattern = re.compile(r"[\s\t]+")


class MeiSchema(object):
def __init__(self, oddfile, resolve_elements=False):
parser = etree.XMLParser(resolve_entities=True)
self.schema = etree.parse(oddfile, parser)
# self.customization = etree.parse(customization_file)

self.active_modules = [] # the modules active in the resulting output
self.element_structure = {} # the element structure.

self.attribute_group_structure = {} # the attribute group structure
# inverted, so we can map attgroups to modules
self.inverse_attribute_group_structure = {}
# holding data types and data lists
self.data_types = {}
self.data_lists = {}

# processing the elements takes a long time, so only do it when needed
if resolve_elements:
self.get_elements()

self.get_attribute_groups()
self.get_data_types_and_lists()
self.invert_attribute_group_structure()
self.set_active_modules()
# lg.debug(self.data_lists)

def get_elements(self):
"""Retrieves all defined elements from the schema."""
elements: list[etree.Element] = self.schema.xpath("//tei:elementSpec", namespaces=TEI_NS)

for element in elements:
modname = element.get("module").split(".")[-1]

if modname not in self.element_structure.keys():
self.element_structure[modname] = {}

element_name = element.get("ident")
memberships = []

element_membership = element.xpath("./tei:classes/tei:memberOf", namespaces=TEI_NS)
for member in element_membership:
if member.get("key").split(".")[0] != "att":
# skip the models that this element might be a member of
continue

self.__get_membership(member, memberships)

self.element_structure[modname][element_name] = memberships

# need a way to keep self-defined attributes:
selfattributes = []
attdefs: list[etree.Element] = element.xpath("./tei:attList/tei:attDef", namespaces=TEI_NS)
if not attdefs:
continue

for attdef in attdefs:
if attdef.get("ident") == "id":
continue
attname = self.__process_att(attdef)
selfattributes.append(attname)

self.element_structure[modname][element_name].append(selfattributes)

def get_attribute_groups(self):
"""Retrieves all defined attribute classes from the schema."""
attribute_groups: list[etree.Element] = self.schema.xpath(".//tei:classSpec[@type='atts']",
namespaces=TEI_NS)
for group in attribute_groups:
group_name: str = group.get("ident")

if group_name == "att.id":
continue

group_module = group.get("module").split(".")[-1]
attdefs = group.xpath("./tei:attList/tei:attDef", namespaces=TEI_NS)
if not attdefs:
continue

if group_module not in self.attribute_group_structure.keys():
self.attribute_group_structure[group_module] = {}

self.attribute_group_structure[group_module][group_name] = []
for attdef in attdefs:
if attdef.get("ident") == "id":
continue
attname = self.__process_att(attdef)
self.attribute_group_structure[group_module][group_name].append(attname)

def get_data_types_and_lists(self):
compound_alternate = self.schema.xpath(".//tei:macroSpec[@type='dt' and .//tei:alternate[@minOccurs='1' and @maxOccurs='1']]",
namespaces=TEI_RNG_NS)

for ct in compound_alternate:
data_type = ct.get("ident")
subtypes = ct.findall(".//tei:alternate/tei:macroRef", namespaces=TEI_RNG_NS)

for st in subtypes:
subtype = self.schema.xpath(".//tei:macroSpec[@ident=$st_ident]//tei:valList/tei:valItem",
st_ident=st.get("key"),
namespaces=TEI_RNG_NS)
for v in subtype:
if data_type not in self.data_types:
self.data_types[data_type] = []
self.data_types[data_type].append(v.get("ident"))

compound_choice = self.schema.xpath(".//tei:macroSpec[@type='dt' and .//rng:choice]|//tei:dataSpec[.//rng:choice]",
namespaces=TEI_RNG_NS)

for ct in compound_choice:
data_type = ct.get("ident")
subtypes = ct.xpath(".//rng:choice/rng:ref", namespaces=TEI_RNG_NS)

for st in subtypes:
subtype = st.xpath("//tei:macroSpec[@ident=$st_ident]//tei:valList/tei:valItem|//tei:dataSpec[@ident=$st_ident]//tei:valList/tei:valItem",
st_ident=st.get("name"),
namespaces=TEI_RNG_NS)

for v in subtype:
if data_type not in self.data_types:
self.data_types[data_type] = []
self.data_types[data_type].append(v.get("ident"))

types = self.schema.xpath(".//tei:macroSpec[.//tei:valList[@type='closed' or @type='semi']]|//tei:dataSpec[.//tei:valList[@type='closed' or @type='semi']]", namespaces=TEI_RNG_NS)
for t in types:
data_type = t.get("ident")
values = t.findall(".//tei:valList/tei:valItem", namespaces=TEI_RNG_NS)
for v in values:
if data_type not in self.data_types:
self.data_types[data_type] = []
self.data_types[data_type].append(v.get("ident"))

vallists = self.schema.xpath("//tei:valList[@type='closed' or @type='semi']", namespaces=TEI_RNG_NS)
for vl in vallists:
element = vl.xpath("./ancestor::tei:classSpec", namespaces=TEI_RNG_NS)
if not element:
continue

att_name = vl.xpath("./parent::tei:attDef/@ident", namespaces=TEI_RNG_NS)

data_list = f"{element[0].get('ident')}@{att_name[0]}"
values = vl.xpath(".//tei:valItem", namespaces=TEI_RNG_NS)

for v in values:
if data_list not in self.data_lists:
self.data_lists[data_list] = []
self.data_lists[data_list].append(v.get("ident"))

def invert_attribute_group_structure(self):
for module, groups in self.attribute_group_structure.items():
for attgroup in groups:
self.inverse_attribute_group_structure[attgroup] = module

def set_active_modules(self):
self.active_modules = list(self.element_structure.keys())
self.active_modules.sort()

def __process_att(self, attdef: etree.Element) -> str:
"""Process attribute definition."""
attdef_ident = attdef.get("ident")
if "-" in attdef_ident:
first, last = attdef_ident.split("-")
attdef_ident = f"{first}{last.title()}"

if attdef.get("ns"):
return f"{attdef.get('ns')}|{attdef_ident}"
elif ":" in attdef_ident:
pfx, att = attdef_ident.split(":")
return f"{NAMESPACES[pfx]}|{att}"
else:
return f"{attdef_ident}"

def __get_membership(self, member: etree.Element, resarr: list[str]) -> None:
"""Get attribute groups."""
member_attgroup = self.schema.xpath(".//tei:classSpec[@type='atts'][@ident=$nm]", nm=member.get("key"), namespaces=TEI_NS)

if member_attgroup is None:
return None

member_groupel = member_attgroup[0]
if member_groupel.get("ident") == "att.id":
return None

if member_groupel.xpath("./tei:attList/tei:attDef", namespaces=TEI_NS):
resarr.append(member_groupel.get("ident"))

m2s = member_groupel.xpath("./tei:classes/tei:memberOf", namespaces=TEI_NS)
for mship in m2s:
self.__get_membership(mship, resarr)

def strpatt(self, string: str) -> str:
"""Returns a version of the string with any leading att. stripped."""
return string.replace("att.", "")

def strpdot(self, string: str) -> str:
"""Returns a version of the string without any dots."""
return "".join(string.split("."))

def cc(self, att_name: str) -> str:
"""Returns a CamelCasedName version of attribute.case.names."""
return "".join([n[0].upper() + n[1:] for n in att_name.split(".")])

def get_att_desc(self, att_name: str) -> str:
"""Returns the documentation string for an attribute by name."""
desc = self.schema.find(f"//tei:attDef[@ident='{att_name}']/tei:desc", namespaces=TEI_NS)
if desc is None:
return ""

return re.sub(WHITESPACE_REGEX, " ", desc.xpath("string()"))

def get_elem_desc(self, elem_name: str) -> str:
"""Returns the documentation string for an element by name."""
desc = self.schema.find(f".//tei:elementSpec[@ident='{elem_name}']/tei:desc", namespaces=TEI_NS)
if desc is None:
return ""

return re.sub(WHITESPACE_REGEX, " ", desc.xpath("string()"))


def main(configure: dict) -> bool:
with open(configure["compiled"], "r") as mei_source:
resolve_elements = configure["elements"]
Expand Down
Loading

0 comments on commit ad4c42b

Please sign in to comment.