-
Notifications
You must be signed in to change notification settings - Fork 1
/
Tagger.pm6
189 lines (129 loc) · 4.8 KB
/
Tagger.pm6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
use v6;
unit class MeCab::Tagger:auth<zef:titsuki>:ver<0.0.19> is repr('CPointer');
use NativeCall;
use MeCab;
use MeCab::Lattice;
use MeCab::DictionaryInfo;
my constant $library = %?RESOURCES<libraries/mecab>.Str;
my sub mecab_destroy(MeCab::Tagger) is native($library) { * }
my sub mecab_new2(Str) returns MeCab::Tagger is native($library) { * }
my sub mecab_version() returns Str is native($library) { * }
my sub mecab_get_theta(MeCab::Tagger) is native($library) { * }
my sub mecab_set_theta(MeCab::Tagger, num32) is native($library) { * }
my sub mecab_get_lattice_level(MeCab::Tagger) returns int32 is native($library) { * }
my sub mecab_set_lattice_level(MeCab::Tagger, int32) returns int32 is native($library) { * }
my sub mecab_parse_lattice(MeCab::Tagger, MeCab::Lattice) returns int32 is native($library) { * }
my sub mecab_sparse_tonode(MeCab::Tagger, Str) returns MeCab::Node is native($library) { * }
my sub mecab_sparse_tostr(MeCab::Tagger, Str) returns Str is native($library) { * }
my sub mecab_sparse_tostr2(MeCab::Tagger, size_t, Str, size_t) returns CArray[int8] is native($library) { * }
my sub mecab_sparse_tostr3(MeCab::Tagger, size_t, Str, size_t, CArray[int8], size_t) returns CArray[int8] is native($library) { * }
my sub mecab_dictionary_info(MeCab::Tagger) returns MeCab::DictionaryInfo is native($library) { * }
my sub mecab_strerror(MeCab::Tagger) returns Str is native($library) { * }
multi submethod new {
mecab_new2("-C");
}
multi submethod new(Str $argv) {
mecab_new2($argv);
}
multi submethod new(
Str :$rcfile,
Str :$dicdir,
Str :$userdic,
) {
my @args;
@args.push('-C'); # allocate-sentence
if $rcfile.defined {
$rcfile.IO.f or die "$rcfile doesn't exist.";
$dicdir.defined or die ":rcfile requires :dicdir.";
$dicdir.IO.d or die "$dicdir doesn't exist.";
@args.push(sprintf('-r %s', $rcfile));
}
if $dicdir.defined {
$dicdir.IO.d or die "$dicdir doesn't exist.";
@args.push(sprintf('-d %s', $dicdir));
}
if $userdic.defined {
$userdic.IO.f or die "$userdic doesn't exist.";
@args.push(sprintf('-u %s', $userdic));
}
mecab_new2(@args.join(' '));
}
method version {
mecab_version();
}
multi method parse(Str $text) {
mecab_sparse_tostr(self, $text);
}
multi method parse(MeCab::Lattice $lattice) returns Bool {
mecab_parse_lattice(self, $lattice) ?? True !! False
}
method parse-tonode(Str $text) {
mecab_sparse_tonode(self, $text);
}
method dictionary-info {
mecab_dictionary_info(self)
}
method strerror {
mecab_strerror(self)
}
submethod DESTROY {
mecab_destroy(self)
}
=begin pod
=head1 NAME
MeCab::Tagger - A Raku MeCab::Tagger class
=head1 SYNOPSIS
use MeCab;
use MeCab::Tagger;
my Str $text = "すもももももももものうち。";
my $mecab-tagger = MeCab::Tagger.new('-C');
loop ( my MeCab::Node $node = $mecab-tagger.parse-tonode($text); $node; $node = $node.next ) {
say ($node.surface, $node.feature).join("\t");
}
# OUTPUT«
# BOS/EOS,*,*,*,*,*,*,*,*
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
# も 助詞,係助詞,*,*,*,*,も,モ,モ
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
# も 助詞,係助詞,*,*,*,*,も,モ,モ
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
# 。 記号,句点,*,*,*,*,。,。,。
# BOS/EOS,*,*,*,*,*,*,*,*
# »
=head1 DESCRIPTION
MeCab::Tagger is a Raku MeCab::Tagger class.
=head2 METHODS
=head3 new
Defined as:
method new(Str $arg) returns MeCab::Tagger
Creates a new MeCab::Tagger object.
=head3 version
Defined as:
method version() returns Str
Returns the version.
=head3 parse
Defined as:
multi method parse(Str $text) returns Str
multi method parse(MeCab::Lattice $lattice)
Parses the given C<$text> or C<$lattice>.
=head3 parse-tonode
Defined as:
method parse-tonode(Str $text) returns MeCab::Node
Parses the given C<$text> and returns a resulting C<MeCab::Node> object.
=head3 dictionary-info
Defined as:
method dictionary-info() returns MeCab::DictionaryInfo
Returns the MeCab::DictionaryInfo object.
=head3 strerror
Defined as:
method strerror() returns Str
Returns a stored error message if it has one.
=head1 AUTHOR
titsuki <titsuki@cpan.org>
=head1 COPYRIGHT AND LICENSE
Copyright 2016 titsuki
libmecab ( http://taku910.github.io/mecab/ ) by Taku Kudo is licensed under the GPL, LGPL or BSD Licenses.
This library is free software; you can redistribute it and/or modify it under the Artistic License 2.0.
=end pod