PlaggerでMeCabを使う

Plaggerをインストール。
Plagger::Plugin::Publish::CSVを参考に、MeCabの解析結果をCSV保存するプラグインを作ってみた。


MeCab.pm

package Plagger::Plugin::Publish::MeCab;
use strict;
use warnings;
use base qw ( Plagger::Plugin );

our $VERSION = 0.01;

use Encode;
use MeCab;
use File::Spec;
use Text::CSV_PP;
use IO::File;

sub register {
    my ($self, $context) = @_;
    $context->register_hook(
        $self,
        'publish.feed' => \&feed,
    );
}

sub feed {
    my ($self, $context, $args) = @_;
    my $mecab = MeCab::Tagger->new();

    my $csv = Text::CSV_PP->new({ binary => 1, sep_char => "\t" });
    my $append = ($self->conf->{mode} && $self->conf->{mode} eq 'append');
    my $dir = $self->conf->{dir};
    unless (-e $dir && -d _) {
        mkdir $dir, 0755 or $context->error("mkdir $dir: $!");
    }

   my $file = Plagger::Util::filename_for($args->{feed}, $self->conf->{filename} || "%u.csv");
    my $path = File::Spec->catfile($dir, $file);
    my $io = IO::File->new($append  ? ">> $path" : "> $path");
    my %tf=();
    for my $entry ($args->{feed}->entries) {
	foreach my $sentence (split(/\n"/,$entry->{title}),
			      split(/\n"/,$entry->{body})){
	    my $node = $mecab->parseToNode($sentence);
	    for(; $node; $node =$node->{next}){
		my $midasi = $node->{surface};
		my($hinsi, $hinsi2) = (split(/,/, $node->{feature}))[0,1];
		# print $midasi,"\t",$hinsi,"\t",$hinsi2,"\n";
		if($hinsi eq '名詞' and 
		   ($hinsi2 eq '固有名詞' or $hinsi2 eq '一般')){
		    $tf{$midasi}++;
		}
	    }
	}
    }
    foreach my $w (keys %tf) {
	my $st = $csv->combine($w, $tf{$w});
	$io->printf("%s\n", $self->convert($csv->string)) if $st;
	# print $self->convert($csv->string),"\n" if $st;
    }
}

sub convert {
    my ($self, $str) = @_;
    utf8::decode($str) unless utf8::is_utf8($str);
    return encode($self->conf->{encoding} || 'utf8', $str);
}

1;

__END__

rss2mecab.yaml

global:
  plugin_path:
    - /Library/Perl/5.8.6/Plagger/Plugin/
    - /Users/tolerance/
  assets_path: /Library/Perl/5.8.6/Plagger/assets/

plugins:
  - module: Subscription::Config
    config:
      feed:
        - http://wiredvision.jp/news/atom.xml
        - http://japan.zdnet.com/rss/news/index.rdf
  - module: Publish::MeCab
    config:
      dir: /Users/tolerance/mecabout
      filename: %u.csv