Skip to content

Commit

Permalink
Add Rust-backed methods to compact and decompact IDs in an OboDoc
Browse files Browse the repository at this point in the history
  • Loading branch information
althonos committed Jul 22, 2019
1 parent 3f51db5 commit a9fee11
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 9 deletions.
69 changes: 68 additions & 1 deletion src/py/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use pyo3::PySequenceProtocol;
use pyo3::PyTypeInfo;

use fastobo::ast as obo;
use fastobo::visit::VisitMut;

use crate::error::Error;
use crate::pyfile::PyFile;
Expand Down Expand Up @@ -64,7 +65,7 @@ impl FromPy<fastobo::ast::EntityFrame> for EntityFrame {
Py::new(py, TypedefFrame::from_py(frame, py)).map(EntityFrame::Typedef)
}
fastobo::ast::EntityFrame::Instance(frame) => {
Py::new(py, InstanceFrame::from_py(frame, py)).map(EntityFrame::Instance)
Py::new(py, InstanceFrame::from_py(frame, py)).map(EntityFrame::Instance)
},
}
.expect("could not allocate on Python heap")
Expand Down Expand Up @@ -127,6 +128,7 @@ impl FromPy<OboDoc> for fastobo::ast::OboDoc {

#[pymethods]
impl OboDoc {
/// `~fastobo.header.HeaderFrame`: the header containing ontology metadata.
#[getter]
fn get_header<'py>(&self, py: Python<'py>) -> PyResult<Py<HeaderFrame>> {
Ok(self.header.clone_ref(py))
Expand All @@ -138,6 +140,71 @@ impl OboDoc {
self.header = Py::new(py, header.clone_py(py))?;
Ok(())
}

/// compact_ids(self, /)
/// --
///
/// Create a semantically equivalent OBO document with compact identifiers.
///
/// The OBO specification describes how to perform an URI decompaction
/// using either ID spaces declared in the document header, builtin ID
/// spaces, or a default rule using the `purl.obolibrary.org` domain.
/// By applying the reverse operation, a new ontology can be created with
/// compact identifiers. Some URLs may not have a compact representation
/// if they don't correspond to any decompaction rule.
///
/// Example:
/// >>> doc = fastobo.loads(textwrap.dedent(
/// ... """
/// ... idspace: MassBank http://www.massbank.jp/jsp/FwdRecord.jsp?id=
/// ...
/// ... [Term]
/// ... id: http://purl.obolibrary.org/obo/CHEBI_27958
/// ... xref: http://www.massbank.jp/jsp/FwdRecord.jsp?id=EA281701
/// ... """
/// ... ))
/// >>> compact_doc = doc.compact_ids()
/// >>> print(compact_doc[0])
/// [Term]
/// id: CHEBI:27958
/// xref: MassBank:EA281701
/// <BLANKLINE>
///
fn compact_ids(&self) -> PyResult<Self> {
let py = unsafe { Python::assume_gil_acquired() };
let mut doc = obo::OboDoc::from_py(self.clone_py(py), py);
fastobo::visit::IdCompactor::new().visit_doc(&mut doc);
Ok(doc.into_py(py))
}

/// decompact_ids(self, /)
/// --
///
/// Create a semantically equivalent OBO document with IRI identifiers.
///
/// Example:
/// >>> doc = fastobo.loads(textwrap.dedent(
/// ... """
/// ... idspace: MassBank http://www.massbank.jp/jsp/FwdRecord.jsp?id=
/// ...
/// ... [Term]
/// ... id: CHEBI:27958
/// ... xref: MassBank:EA281701
/// ... """
/// ... ))
/// >>> url_doc = doc.decompact_ids()
/// >>> print(url_doc[0])
/// [Term]
/// id: http://purl.obolibrary.org/obo/CHEBI_27958
/// xref: http://www.massbank.jp/jsp/FwdRecord.jsp?id=EA281701
/// <BLANKLINE>
///
fn decompact_ids(&self) -> PyResult<Self> {
let py = unsafe { Python::assume_gil_acquired() };
let mut doc = obo::OboDoc::from_py(self.clone_py(py), py);
fastobo::visit::IdDecompactor::new().visit_doc(&mut doc);
Ok(doc.into_py(py))
}
}

#[pyproto]
Expand Down
21 changes: 13 additions & 8 deletions src/py/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ fn fastobo(py: Python, m: &PyModule) -> PyResult<()> {
/// >>> doc = fastobo.load(urlopen(url))
/// >>> doc.header[2]
/// OntologyClause('cmo.obo')
///
#[pyfn(m, "load")]
fn load(py: Python, fh: &PyAny) -> PyResult<OboDoc> {
if let Ok(s) = fh.downcast_ref::<PyString>() {
Expand Down Expand Up @@ -171,7 +172,6 @@ fn fastobo(py: Python, m: &PyModule) -> PyResult<()> {
/// Use ``fastobo.loads`` to deserialize a literal OBO frame into the
/// corresponding syntax tree:
///
/// >>> import textwrap
/// >>> doc = fastobo.loads(textwrap.dedent(
/// ... """
/// ... [Term]
Expand All @@ -195,9 +195,10 @@ fn fastobo(py: Python, m: &PyModule) -> PyResult<()> {
/// load_graph(fh)
/// --
///
/// Load an OBO graph from the given path or file handle. Both JSON and
/// YAML formats are supported. *Actually, since YAML is a superset of
/// JSON, all graphs are in YAML format...*
/// Load an OBO graph from the given path or file handle.
///
/// Both JSON and YAML formats are supported. *Actually, since YAML is a
/// superset of JSON, all graphs are in YAML format.*
///
/// Arguments:
/// fh (str or file-handle): the path to an OBO graph file, or a
Expand Down Expand Up @@ -225,6 +226,13 @@ fn fastobo(py: Python, m: &PyModule) -> PyResult<()> {
/// >>> doc = fastobo.load_graph(urlopen(url))
/// >>> doc[4]
/// TermFrame(Url('http://purl.obolibrary.org/obo/PATO_0000000'))
///
/// Note:
/// OBO graphs only contains URL identifiers, and deserializing one
/// will not compact this function automatically. Consider using the
/// `~fastobo.doc.OboDoc.compact_ids` method if that is the expected
/// result.
///
#[pyfn(m, "load_graph")]
fn load_graph(py: Python, fh: &PyAny) -> PyResult<OboDoc> {
// Parse the source graph document.
Expand Down Expand Up @@ -259,12 +267,9 @@ fn fastobo(py: Python, m: &PyModule) -> PyResult<()> {

// Convert the graph to an OBO document
let graph = doc.graphs.into_iter().next().unwrap();
let mut doc = obo::OboDoc::from_graph(graph)
let doc = obo::OboDoc::from_graph(graph)
.map_err(|e| RuntimeError::py_err(e.to_string()))?;

// Shrink IDs in OBO document
// fastobo::visit::IdCompactor::new().visit_doc(&mut doc);

// Convert the OBO document to a Python handle
Ok(OboDoc::from_py(doc, py))
}
Expand Down
2 changes: 2 additions & 0 deletions tests/test_doctests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import doctest
import warnings
import pprint
import textwrap
import types

import fastobo
Expand Down Expand Up @@ -40,6 +41,7 @@ def load_tests(loader, tests, ignore):
globs = {
"fastobo": fastobo,
"datetime": datetime,
"textwrap": textwrap,
"pprint": pprint.pprint,
"ms": fastobo.load(os.path.realpath(
os.path.join(__file__, "..", "data", "ms.obo")
Expand Down

0 comments on commit a9fee11

Please sign in to comment.