Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.0.0 #18

Merged
merged 45 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a684463
Rename tokenize module
vmenger Nov 30, 2023
4b02432
Add token index
vmenger Nov 30, 2023
6fb99e0
Add frozendict dependency
vmenger Dec 1, 2023
a178872
Speedup annotation sorting
vmenger Dec 1, 2023
2e01493
Speedup regexp annotator
vmenger Dec 1, 2023
903f540
Speedup single token annotator
vmenger Dec 1, 2023
949ec06
Fix test
vmenger Dec 1, 2023
1dae5f3
Speedup multi token lookup annotator
vmenger Dec 1, 2023
7e91a06
Add offset option
vmenger Dec 1, 2023
e66c038
Add repr for string processor
vmenger Dec 1, 2023
dde29cb
Move link token logic
vmenger Dec 1, 2023
ab5cc70
Add token lookup logic with matching pipeline
vmenger Dec 1, 2023
b0c493f
Update offset to start_i
vmenger Dec 1, 2023
608c3f5
Update previous and next tests
vmenger Dec 1, 2023
3ef03ee
Update get_words and token_lookup tests
vmenger Dec 1, 2023
79f6190
Formatting
vmenger Dec 1, 2023
eeffb20
Cleanup annotator code
vmenger Dec 1, 2023
280e7a8
Cleanup tokenizer code
vmenger Dec 1, 2023
f4d21ef
Formatting
vmenger Dec 1, 2023
fcc8873
Rename instance var
vmenger Dec 1, 2023
798ddfc
Add info when not presenting callbacks as frozendicht
vmenger Dec 1, 2023
60f58b6
Update changelog
vmenger Dec 1, 2023
b43fc0f
Update dependencies
vmenger Dec 1, 2023
4079155
Optimize caching
vmenger Dec 4, 2023
bd65b0a
Optimize caching
vmenger Dec 4, 2023
b03108b
Add option to directly add trie to multi token lookup
vmenger Dec 5, 2023
61d6909
Update formatting and linting
vmenger Dec 7, 2023
51aea0f
Update formatting and linting
vmenger Dec 7, 2023
0a0624b
Linting
vmenger Dec 7, 2023
e4998ad
Formattin
vmenger Dec 8, 2023
a72ba4b
Improve pattern
vmenger Dec 8, 2023
fbdbd55
Improve processor and processor group abstraction
vmenger Dec 8, 2023
936a990
Rename files
vmenger Dec 8, 2023
8b486a7
Formatting
vmenger Dec 8, 2023
7945cb0
Order test classes
vmenger Dec 8, 2023
9587342
Move dev dependency
vmenger Dec 8, 2023
0a2b8f2
Update changelog
vmenger Dec 8, 2023
43c8241
Remove lcov file
vmenger Dec 8, 2023
46de845
Update docs
vmenger Dec 8, 2023
a9b9bd7
Update changlog
vmenger Dec 8, 2023
f3c15d2
Update token serializing
vmenger Dec 8, 2023
528a5b9
Rename pre_match_tokens internally
vmenger Dec 11, 2023
1263c37
Fix typo
vmenger Dec 12, 2023
dcb36bc
Use casefold instead of lower
vmenger Dec 13, 2023
a12775c
Prepare release
vmenger Dec 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rename files
  • Loading branch information
vmenger committed Dec 8, 2023
commit 936a990cf6f961ada8189cf5affd15ead449a056
2 changes: 1 addition & 1 deletion docdeid/deidentifier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional

from docdeid.document import Document
from docdeid.process.doc import DocProcessorGroup
from docdeid.process.doc_processor import DocProcessorGroup
from docdeid.tokenizer import Tokenizer


Expand Down
4 changes: 2 additions & 2 deletions docdeid/process/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .annotation_set import (
from .annotation_processor import (
AnnotationProcessor,
MergeAdjacentAnnotations,
OverlapResolver,
Expand All @@ -10,5 +10,5 @@
SingleTokenLookupAnnotator,
TokenPatternAnnotator,
)
from .doc import DocProcessor, DocProcessorGroup
from .doc_processor import DocProcessor, DocProcessorGroup
from .redactor import RedactAllText, Redactor, SimpleRedactor
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from docdeid.annotation import Annotation, AnnotationSet
from docdeid.document import Document
from docdeid.process.doc import DocProcessor
from docdeid.process.doc_processor import DocProcessor


class AnnotationProcessor(DocProcessor, ABC):
Expand Down
2 changes: 1 addition & 1 deletion docdeid/process/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from docdeid.document import Document
from docdeid.ds.lookup import LookupSet, LookupTrie
from docdeid.pattern import TokenPattern
from docdeid.process.doc import DocProcessor
from docdeid.process.doc_processor import DocProcessor
from docdeid.str.processor import StringModifier
from docdeid.tokenizer import Token, Tokenizer

Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion docdeid/process/redactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from docdeid.annotation import Annotation, AnnotationSet
from docdeid.document import Document
from docdeid.process.doc import DocProcessor
from docdeid.process.doc_processor import DocProcessor


class Redactor(DocProcessor, ABC):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/unit/ds/test_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_add_items_from_self(self, lowercase_proc):

def test_add_items_from_file(self):
lookup_set = LookupSet()
lookup_set.add_items_from_file(file_path="tests/unit/test_data/name_list.txt")
lookup_set.add_items_from_file(file_path="tests/data/name_list.txt")

assert "John" in lookup_set
assert "Mary" in lookup_set
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/process/test_annotation_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from frozendict import frozendict

from docdeid.annotation import Annotation, AnnotationSet
from docdeid.process.annotation_set import MergeAdjacentAnnotations, OverlapResolver
from docdeid.process.annotation_processor import MergeAdjacentAnnotations, OverlapResolver


class TestOverlapResolver:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from unittest.mock import patch

from docdeid import Document
from docdeid.process.doc import DocProcessor, DocProcessorGroup
from docdeid.process.doc_processor import DocProcessor, DocProcessorGroup


class TestDocProcessorGroup:
@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_create_doc_processor_group(self):
proc_1 = DocProcessor()
proc_2 = DocProcessor()
Expand All @@ -23,7 +23,7 @@ def test_create_doc_processor_group(self):
proc_1_process.assert_called_once()
proc_2_process.assert_called_once()

@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_doc_processor_add_at_position(self):
dpg = DocProcessorGroup()
proc = DocProcessor()
Expand All @@ -33,7 +33,7 @@ def test_doc_processor_add_at_position(self):

assert dpg.get_names() == ["proc_1", "proc_3", "proc_2"]

@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_create_doc_processor_group_enabled(self):
proc_1 = DocProcessor()
proc_2 = DocProcessor()
Expand All @@ -51,7 +51,7 @@ def test_create_doc_processor_group_enabled(self):
proc_1_process.assert_not_called()
proc_2_process.assert_called_once()

@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_create_doc_processor_group_disabled(self):
proc_1 = DocProcessor()
proc_2 = DocProcessor()
Expand All @@ -69,7 +69,7 @@ def test_create_doc_processor_group_disabled(self):
proc_1_process.assert_not_called()
proc_2_process.assert_called_once()

@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_doc_processor_group_names(self):
dpg = DocProcessorGroup()
dpg.add_processor("proc_1", DocProcessor())
Expand All @@ -88,7 +88,7 @@ def test_doc_processor_group_names(self):
"nested_proc_1",
]

@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_remove_doc_processor(self):
dpg = DocProcessorGroup()
proc_1 = DocProcessor()
Expand All @@ -99,7 +99,7 @@ def test_remove_doc_processor(self):
dpg.process(doc=Document(text="test"))
proc_1_process.assert_not_called()

@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_get_doc_processor(self):
dpg = DocProcessorGroup()
proc_1 = DocProcessor()
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_deidentifier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest.mock import patch

from docdeid.deidentifier import DocDeid
from docdeid.process.doc import DocProcessor
from docdeid.process.doc_processor import DocProcessor
from docdeid.tokenizer import Tokenizer


Expand All @@ -11,7 +11,7 @@ def test_create_docdeid(self):
doc = dd.deidentify(text="test")
assert doc.text == "test"

@patch("docdeid.process.doc.DocProcessor.__abstractmethods__", set())
@patch("docdeid.process.doc_processor.DocProcessor.__abstractmethods__", set())
def test_add_processors(self):
proc_1 = DocProcessor()
proc_2 = DocProcessor()
Expand Down
File renamed without changes.