Add p5-Lingua-ZH-WordSegmenter 0.01, simplified Chinese Word

Segmentation. PR: ports/113476 Submitted by: Gea-Suan Lin <gslin at gslin.org>
svn path=/head/; revision=194590
2007-07-02 02:08:49 +00:00 · 2007-07-02 02:08:49 +00:00 · 14e6325838 · 2021-03-31 03:12:20 +00:00
commit 14e6325838
parent b399e0595a
5 changed files with 47 additions and 0 deletions
--- a/chinese/Makefile
+++ b/chinese/Makefile
@ -98,6 +98,7 @@
    SUBDIR += p5-Lingua-ZH-Summarize
    SUBDIR += p5-Lingua-ZH-TaBE
    SUBDIR += p5-Lingua-ZH-Toke
+    SUBDIR += p5-Lingua-ZH-WordSegmenter
    SUBDIR += p5-Lingua-ZH-Wrap
    SUBDIR += pcmanx
    SUBDIR += php_doc-hk
--- a/chinese/p5-Lingua-ZH-WordSegmenter/Makefile
+++ b/chinese/p5-Lingua-ZH-WordSegmenter/Makefile
@ -0,0 +1,22 @@
+# New ports collection makefile for:	p5-Lingua-ZH-WordSegmenter
+# Date created:		2007-06-08
+# Whom:			Gea-Suan Lin <gslin@gslin.org>
+#
+# $FreeBSD$
+#
+
+PORTNAME=	Lingua-ZH-WordSegmenter
+PORTVERSION=	0.01
+CATEGORIES=	chinese perl5
+MASTER_SITES=	CPAN
+MASTER_SITE_SUBDIR=	Lingua
+PKGNAMEPREFIX=	p5-
+
+MAINTAINER=	gslin@gslin.org
+COMMENT=	Simplified Chinese Word Segmentation
+
+PERL_CONFIGURE=	yes
+
+MAN3=		Lingua::ZH::WordSegmenter.3
+
+.include <bsd.port.mk>
--- a/chinese/p5-Lingua-ZH-WordSegmenter/distinfo
+++ b/chinese/p5-Lingua-ZH-WordSegmenter/distinfo
@ -0,0 +1,3 @@
+MD5 (Lingua-ZH-WordSegmenter-0.01.tar.gz) = 033dca8be176cd507c0b7f193ad372f1
+SHA256 (Lingua-ZH-WordSegmenter-0.01.tar.gz) = 8be1f370f3c65b933e0e0b8ca1d2d6267a5fd121d25903bdd388ed8be9d9a932
+SIZE (Lingua-ZH-WordSegmenter-0.01.tar.gz) = 1227001
--- a/chinese/p5-Lingua-ZH-WordSegmenter/pkg-descr
+++ b/chinese/p5-Lingua-ZH-WordSegmenter/pkg-descr
@ -0,0 +1,13 @@
+This is a perl version of simplified Chinese word segmentation.
+
+The algorithm for this segmenter is to search the longest word at each
+point from both left and right directions, and choose the one with
+higher frequency product.
+
+The original program is from the CPAN module Lingua::ZH::WordSegment
+(http://search.cpan.org/~chenyr/) I did the follwing changes: 1) make
+the interface object oriented; 2) make the internal string into utf8;
+3) using sogou's dictionary (http://www.sogou.com/labs/dl/w.html) as
+the default dictionary.
+
+WWW:	http://search.cpan.org/dist/Lingua-ZH-WordSegmenter/
--- a/chinese/p5-Lingua-ZH-WordSegmenter/pkg-plist
+++ b/chinese/p5-Lingua-ZH-WordSegmenter/pkg-plist
@ -0,0 +1,8 @@
+@comment $FreeBSD$
+%%SITE_PERL%%/%%PERL_ARCH%%/auto/Lingua/ZH/WordSegmenter/.packlist
+%%SITE_PERL%%/Lingua/ZH/WordSegmenter.pm
+@dirrmtry %%SITE_PERL%%/Lingua/ZH
+@dirrmtry %%SITE_PERL%%/Lingua
+@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Lingua/ZH/WordSegmenter
+@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Lingua/ZH
+@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Lingua