Import libtextcat-2.2

Libtextcat is a library with functions that implement the classification
technique described in Cavnar & Trenkle, "N-Gram-Based Text
Categorization". It was primarily developed for language guessing, a
task on which it is known to perform with near-perfect accuracy.

Based on the FreeBSD port.
This is a prerequisite for pinot.
This commit is contained in:
ajacoutot 2009-07-31 15:42:58 +00:00
parent cb6b12d0a2
commit b74f50b2b8
5 changed files with 140 additions and 0 deletions

42
textproc/libtextcat/Makefile Executable file
View File

@ -0,0 +1,42 @@
# $OpenBSD: Makefile,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $
COMMENT= language guessing library
DISTNAME= libtextcat-2.2
SHARED_LIBS += textcat 0.0 # .0.0
CATEGORIES= textproc
HOMEPAGE= http://software.wise-guys.nl/libtextcat/
# GPLv2
PERMIT_PACKAGE_CDROM= Yes
PERMIT_PACKAGE_FTP= Yes
PERMIT_DISTFILES_CDROM= Yes
PERMIT_DISTFILES_FTP= Yes
WANTLIB += c
MASTER_SITES= http://software.wise-guys.nl/download/
USE_LIBTOOL= Yes
CONFIGURE_STYLE= gnu
CONFIGURE_ARGS= ${CONFIGURE_SHARED}
post-install:
${INSTALL_DATA} ${WRKSRC}/src/textcat.h ${PREFIX}/include/
${INSTALL_DATA_DIR} ${PREFIX}/share/libtextcat/LM
${INSTALL_DATA} ${WRKSRC}/langclass/LM/*.lm ${PREFIX}/share/libtextcat/LM/
${INSTALL_DATA} ${WRKSRC}/langclass/conf.txt ${PREFIX}/share/libtextcat
${INSTALL_DATA_DIR} ${PREFIX}/share/doc/libtextcat
${INSTALL_DATA} ${WRKSRC}/README ${PREFIX}/share/doc/libtextcat
do-regress:
cd ${WRKSRC}/langclass/ && \
for t in `ls ShortTexts/*.txt`; do \
../src/testtextcat conf.txt < $$t; \
done
.include <bsd.port.mk>

View File

@ -0,0 +1,5 @@
MD5 (libtextcat-2.2.tar.gz) = Eoz8hu1ZU+V/4PWumLYsLg==
RMD160 (libtextcat-2.2.tar.gz) = IFi6tmQkYP2u4TmT3QXaKapRbvo=
SHA1 (libtextcat-2.2.tar.gz) = 6Y1xSdaiD9u1jMC3nLXj+VrjBOQ=
SHA256 (libtextcat-2.2.tar.gz) = Vne63/xIqNMy40XqT+Il41d/U/yV3u7IMGAAslaCllU=
SIZE (libtextcat-2.2.tar.gz) = 540999

4
textproc/libtextcat/pkg/DESCR Executable file
View File

@ -0,0 +1,4 @@
Libtextcat is a library with functions that implement the classification
technique described in Cavnar & Trenkle, "N-Gram-Based Text
Categorization". It was primarily developed for language guessing, a
task on which it is known to perform with near-perfect accuracy.

View File

@ -0,0 +1,2 @@
@comment $OpenBSD: PFRAG.shared,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $
@lib lib/libtextcat.so.${LIBtextcat_VERSION}

View File

@ -0,0 +1,87 @@
@comment $OpenBSD: PLIST,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $
%%SHARED%%
@bin bin/createfp
include/textcat.h
lib/libtextcat.a
lib/libtextcat.la
share/doc/libtextcat/
share/doc/libtextcat/README
share/libtextcat/
share/libtextcat/LM/
share/libtextcat/LM/afrikaans.lm
share/libtextcat/LM/albanian.lm
share/libtextcat/LM/amharic-utf.lm
share/libtextcat/LM/arabic-iso8859_6.lm
share/libtextcat/LM/arabic-windows1256.lm
share/libtextcat/LM/armenian.lm
share/libtextcat/LM/basque.lm
share/libtextcat/LM/belarus-windows1251.lm
share/libtextcat/LM/bosnian.lm
share/libtextcat/LM/breton.lm
share/libtextcat/LM/bulgarian-iso8859_5.lm
share/libtextcat/LM/catalan.lm
share/libtextcat/LM/chinese-big5.lm
share/libtextcat/LM/chinese-gb2312.lm
share/libtextcat/LM/croatian-ascii.lm
share/libtextcat/LM/czech-iso8859_2.lm
share/libtextcat/LM/danish.lm
share/libtextcat/LM/drents.lm
share/libtextcat/LM/dutch.lm
share/libtextcat/LM/english.lm
share/libtextcat/LM/esperanto.lm
share/libtextcat/LM/estonian.lm
share/libtextcat/LM/finnish.lm
share/libtextcat/LM/french.lm
share/libtextcat/LM/frisian.lm
share/libtextcat/LM/georgian.lm
share/libtextcat/LM/german.lm
share/libtextcat/LM/greek-iso8859-7.lm
share/libtextcat/LM/hebrew-iso8859_8.lm
share/libtextcat/LM/hindi.lm
share/libtextcat/LM/hungarian.lm
share/libtextcat/LM/icelandic.lm
share/libtextcat/LM/indonesian.lm
share/libtextcat/LM/irish.lm
share/libtextcat/LM/italian.lm
share/libtextcat/LM/japanese-euc_jp.lm
share/libtextcat/LM/japanese-shift_jis.lm
share/libtextcat/LM/korean.lm
share/libtextcat/LM/latin.lm
share/libtextcat/LM/latvian.lm
share/libtextcat/LM/lithuanian.lm
share/libtextcat/LM/malay.lm
share/libtextcat/LM/manx.lm
share/libtextcat/LM/marathi.lm
share/libtextcat/LM/middle_frisian.lm
share/libtextcat/LM/mingo.lm
share/libtextcat/LM/nepali.lm
share/libtextcat/LM/norwegian.lm
share/libtextcat/LM/persian.lm
share/libtextcat/LM/polish.lm
share/libtextcat/LM/portuguese.lm
share/libtextcat/LM/quechua.lm
share/libtextcat/LM/romanian.lm
share/libtextcat/LM/rumantsch.lm
share/libtextcat/LM/russian-iso8859_5.lm
share/libtextcat/LM/russian-koi8_r.lm
share/libtextcat/LM/russian-windows1251.lm
share/libtextcat/LM/sanskrit.lm
share/libtextcat/LM/scots.lm
share/libtextcat/LM/scots_gaelic.lm
share/libtextcat/LM/serbian-ascii.lm
share/libtextcat/LM/slovak-ascii.lm
share/libtextcat/LM/slovak-windows1250.lm
share/libtextcat/LM/slovenian-ascii.lm
share/libtextcat/LM/slovenian-iso8859_2.lm
share/libtextcat/LM/spanish.lm
share/libtextcat/LM/swahili.lm
share/libtextcat/LM/swedish.lm
share/libtextcat/LM/tagalog.lm
share/libtextcat/LM/tamil.lm
share/libtextcat/LM/thai.lm
share/libtextcat/LM/turkish.lm
share/libtextcat/LM/ukrainian-koi8_r.lm
share/libtextcat/LM/vietnamese.lm
share/libtextcat/LM/welsh.lm
share/libtextcat/LM/yiddish-utf.lm
share/libtextcat/conf.txt