Skip to content

Commit

Permalink
Merge pull request #19 from titsuki/neologd
Browse files Browse the repository at this point in the history
Introduce mecab-ipadic-neologd
  • Loading branch information
titsuki authored Feb 12, 2018
2 parents 6d71eb1 + 3952783 commit 9b0edde
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 10 deletions.
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,46 @@ MeCab depends on the following:

Once the build starts, it automatically downloads `mecab-0.996` and `mecab-ipadic-2.7.0-20070801` with `wget` and installs these stuffs under the `$HOME/.p6mecab` directory, where `$HOME` is your home directory.

Use 3rd-party dictionary
========================

mecab-ipadic-neologd
--------------------

* Step1: download and install neologd

Example:

$ git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git
$ cd mecab-ipadic-neologd
$ export PATH=$HOME/.p6mecab/bin:$PATH
$ ./bin/install-mecab-ipadic-neologd -p $HOME/.p6mecab/lib/mecab/dic/ipadic-neologd

* Step2: Use .new(:dicdir(PATH_TO_THE_DIR))

Example:

use MeCab;
use MeCab::Tagger;

my Str $text = "トランプ大統領 ワシントンで大規模軍事パレードを指示";
my $mecab-tagger = MeCab::Tagger.new(:dicdir("$*HOME/.p6mecab/lib/mecab/dic/ipadic-neologd"));
loop ( my MeCab::Node $node = $mecab-tagger.parse-tonode($text); $node; $node = $node.next ) {
say ($node.surface, $node.feature).join("\t");
}

# OUTPUT«
# BOS/EOS,*,*,*,*,*,*,*,*
# トランプ大統領 名詞,固有名詞,人名,一般,*,*,ドナルド・トランプ,トランプダイトウリョウ,トランプダイトウリョー
# ワシントン 名詞,固有名詞,地域,一般,*,*,ワシントン,ワシントン,ワシントン
# で 助詞,格助詞,一般,*,*,*,で,デ,デ
# 大規模 名詞,一般,*,*,*,*,大規模,ダイキボ,ダイキボ
# 軍事パレード 名詞,固有名詞,一般,*,*,*,軍事パレード,グンジパレード,グンジパレード
# を 助詞,格助詞,一般,*,*,*,を,ヲ,ヲ
# 指示 名詞,サ変接続,*,*,*,*,指示,シジ,シジ
# BOS/EOS,*,*,*,*,*,*,*,*
# »

AUTHOR
======

Expand All @@ -99,3 +139,4 @@ Copyright 2016 titsuki
libmecab ( http://taku910.github.io/mecab/ ) by Taku Kudo is licensed under the GPL, LGPL or BSD Licenses.

This library is free software; you can redistribute it and/or modify it under the Artistic License 2.0.

39 changes: 39 additions & 0 deletions lib/MeCab.pm6
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,45 @@ MeCab depends on the following:
Once the build starts, it automatically downloads C<mecab-0.996> and C<mecab-ipadic-2.7.0-20070801> with C<wget> and installs these stuffs under the C<$HOME/.p6mecab> directory, where C<$HOME> is your home directory.
=head1 Use 3rd-party dictionary
=head2 mecab-ipadic-neologd
=item1 Step1: download and install neologd
Example:
$ git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git
$ cd mecab-ipadic-neologd
$ export PATH=$HOME/.p6mecab/bin:$PATH
$ ./bin/install-mecab-ipadic-neologd -p $HOME/.p6mecab/lib/mecab/dic/ipadic-neologd
=item1 Step2: Use .new(:dicdir(PATH_TO_THE_DIR))
Example:
use MeCab;
use MeCab::Tagger;
my Str $text = "トランプ大統領 ワシントンで大規模軍事パレードを指示";
my $mecab-tagger = MeCab::Tagger.new(:dicdir("$*HOME/.p6mecab/lib/mecab/dic/ipadic-neologd"));
loop ( my MeCab::Node $node = $mecab-tagger.parse-tonode($text); $node; $node = $node.next ) {
say ($node.surface, $node.feature).join("\t");
}
# OUTPUT«
# BOS/EOS,*,*,*,*,*,*,*,*
# トランプ大統領 名詞,固有名詞,人名,一般,*,*,ドナルド・トランプ,トランプダイトウリョウ,トランプダイトウリョー
# ワシントン 名詞,固有名詞,地域,一般,*,*,ワシントン,ワシントン,ワシントン
# で 助詞,格助詞,一般,*,*,*,で,デ,デ
# 大規模 名詞,一般,*,*,*,*,大規模,ダイキボ,ダイキボ
# 軍事パレード 名詞,固有名詞,一般,*,*,*,軍事パレード,グンジパレード,グンジパレード
# を 助詞,格助詞,一般,*,*,*,を,ヲ,ヲ
# 指示 名詞,サ変接続,*,*,*,*,指示,シジ,シジ
# BOS/EOS,*,*,*,*,*,*,*,*
# »
=head1 AUTHOR
titsuki <titsuki@cpan.org>
Expand Down
40 changes: 34 additions & 6 deletions lib/MeCab/Model.pm6
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,42 @@ my sub mecab_model_new2(Str) returns MeCab::Model is native($library) { * }
my sub mecab_model_new_tagger(MeCab::Model) returns MeCab::Tagger is native($library) { * }
my sub mecab_model_new_lattice(MeCab::Model) returns MeCab::Lattice is native($library) { * }

multi method new {
my Str $argv = "-C";
mecab_model_new2($argv)
multi submethod new {
mecab_model_new2("-C")
}

multi method new(Str $extra-argv) {
my Str $argv = "-C " ~ $extra-argv;
mecab_model_new2($argv)
multi submethod new(Str $argv) {
mecab_model_new2($argv);
}

multi submethod new(
Str :$rcfile,
Str :$dicdir,
Str :$userdic,
) {
my @args;
@args.push('-C'); # allocate-sentence

if $rcfile.defined {
$rcfile.IO.f or die "$rcfile doesn't exist.";
$dicdir.defined or die ":rcfile requires :dicdir.";
$dicdir.IO.d or die "$dicdir doesn't exist.";

@args.push(sprintf('-r %s', $rcfile));
}

if $dicdir.defined {
$dicdir.IO.d or die "$dicdir doesn't exist.";

@args.push(sprintf('-d %s', $dicdir));
}

if $userdic.defined {
$userdic.IO.f or die "$userdic doesn't exist.";

@args.push(sprintf('-u %s', $userdic))
}
mecab_model_new2(@args.join(' '));
}

method create-tagger {
Expand Down
38 changes: 34 additions & 4 deletions lib/MeCab/Tagger.pm6
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,42 @@ my sub mecab_sparse_tostr3(MeCab::Tagger, size_t, Str, size_t, CArray[int8], siz
my sub mecab_dictionary_info(MeCab::Tagger) returns MeCab::DictionaryInfo is native($library) { * }
my sub mecab_strerror(MeCab::Tagger) returns Str is native($library) { * }

multi method new(Str $arg) {
mecab_new2($arg);
multi submethod new {
mecab_new2("-C");
}

multi method new {
mecab_new2("-C");
multi submethod new(Str $argv) {
mecab_new2($argv);
}

multi submethod new(
Str :$rcfile,
Str :$dicdir,
Str :$userdic,
) {
my @args;
@args.push('-C'); # allocate-sentence

if $rcfile.defined {
$rcfile.IO.f or die "$rcfile doesn't exist.";
$dicdir.defined or die ":rcfile requires :dicdir.";
$dicdir.IO.d or die "$dicdir doesn't exist.";

@args.push(sprintf('-r %s', $rcfile));
}

if $dicdir.defined {
$dicdir.IO.d or die "$dicdir doesn't exist.";

@args.push(sprintf('-d %s', $dicdir));
}

if $userdic.defined {
$userdic.IO.f or die "$userdic doesn't exist.";

@args.push(sprintf('-u %s', $userdic));
}
mecab_new2(@args.join(' '));
}

method version {
Expand Down

0 comments on commit 9b0edde

Please sign in to comment.