Import libtextcat-2.2
Libtextcat is a library with functions that implement the classification technique described in Cavnar & Trenkle, "N-Gram-Based Text Categorization". It was primarily developed for language guessing, a task on which it is known to perform with near-perfect accuracy. Based on the FreeBSD port. This is a prerequisite for pinot.
This commit is contained in:
parent
cb6b12d0a2
commit
b74f50b2b8
42
textproc/libtextcat/Makefile
Executable file
42
textproc/libtextcat/Makefile
Executable file
@ -0,0 +1,42 @@
|
||||
# $OpenBSD: Makefile,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $
|
||||
|
||||
COMMENT= language guessing library
|
||||
|
||||
DISTNAME= libtextcat-2.2
|
||||
|
||||
SHARED_LIBS += textcat 0.0 # .0.0
|
||||
|
||||
CATEGORIES= textproc
|
||||
|
||||
HOMEPAGE= http://software.wise-guys.nl/libtextcat/
|
||||
|
||||
# GPLv2
|
||||
PERMIT_PACKAGE_CDROM= Yes
|
||||
PERMIT_PACKAGE_FTP= Yes
|
||||
PERMIT_DISTFILES_CDROM= Yes
|
||||
PERMIT_DISTFILES_FTP= Yes
|
||||
|
||||
WANTLIB += c
|
||||
|
||||
MASTER_SITES= http://software.wise-guys.nl/download/
|
||||
|
||||
USE_LIBTOOL= Yes
|
||||
|
||||
CONFIGURE_STYLE= gnu
|
||||
CONFIGURE_ARGS= ${CONFIGURE_SHARED}
|
||||
|
||||
post-install:
|
||||
${INSTALL_DATA} ${WRKSRC}/src/textcat.h ${PREFIX}/include/
|
||||
${INSTALL_DATA_DIR} ${PREFIX}/share/libtextcat/LM
|
||||
${INSTALL_DATA} ${WRKSRC}/langclass/LM/*.lm ${PREFIX}/share/libtextcat/LM/
|
||||
${INSTALL_DATA} ${WRKSRC}/langclass/conf.txt ${PREFIX}/share/libtextcat
|
||||
${INSTALL_DATA_DIR} ${PREFIX}/share/doc/libtextcat
|
||||
${INSTALL_DATA} ${WRKSRC}/README ${PREFIX}/share/doc/libtextcat
|
||||
|
||||
do-regress:
|
||||
cd ${WRKSRC}/langclass/ && \
|
||||
for t in `ls ShortTexts/*.txt`; do \
|
||||
../src/testtextcat conf.txt < $$t; \
|
||||
done
|
||||
|
||||
.include <bsd.port.mk>
|
5
textproc/libtextcat/distinfo
Normal file
5
textproc/libtextcat/distinfo
Normal file
@ -0,0 +1,5 @@
|
||||
MD5 (libtextcat-2.2.tar.gz) = Eoz8hu1ZU+V/4PWumLYsLg==
|
||||
RMD160 (libtextcat-2.2.tar.gz) = IFi6tmQkYP2u4TmT3QXaKapRbvo=
|
||||
SHA1 (libtextcat-2.2.tar.gz) = 6Y1xSdaiD9u1jMC3nLXj+VrjBOQ=
|
||||
SHA256 (libtextcat-2.2.tar.gz) = Vne63/xIqNMy40XqT+Il41d/U/yV3u7IMGAAslaCllU=
|
||||
SIZE (libtextcat-2.2.tar.gz) = 540999
|
4
textproc/libtextcat/pkg/DESCR
Executable file
4
textproc/libtextcat/pkg/DESCR
Executable file
@ -0,0 +1,4 @@
|
||||
Libtextcat is a library with functions that implement the classification
|
||||
technique described in Cavnar & Trenkle, "N-Gram-Based Text
|
||||
Categorization". It was primarily developed for language guessing, a
|
||||
task on which it is known to perform with near-perfect accuracy.
|
2
textproc/libtextcat/pkg/PFRAG.shared
Normal file
2
textproc/libtextcat/pkg/PFRAG.shared
Normal file
@ -0,0 +1,2 @@
|
||||
@comment $OpenBSD: PFRAG.shared,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $
|
||||
@lib lib/libtextcat.so.${LIBtextcat_VERSION}
|
87
textproc/libtextcat/pkg/PLIST
Normal file
87
textproc/libtextcat/pkg/PLIST
Normal file
@ -0,0 +1,87 @@
|
||||
@comment $OpenBSD: PLIST,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $
|
||||
%%SHARED%%
|
||||
@bin bin/createfp
|
||||
include/textcat.h
|
||||
lib/libtextcat.a
|
||||
lib/libtextcat.la
|
||||
share/doc/libtextcat/
|
||||
share/doc/libtextcat/README
|
||||
share/libtextcat/
|
||||
share/libtextcat/LM/
|
||||
share/libtextcat/LM/afrikaans.lm
|
||||
share/libtextcat/LM/albanian.lm
|
||||
share/libtextcat/LM/amharic-utf.lm
|
||||
share/libtextcat/LM/arabic-iso8859_6.lm
|
||||
share/libtextcat/LM/arabic-windows1256.lm
|
||||
share/libtextcat/LM/armenian.lm
|
||||
share/libtextcat/LM/basque.lm
|
||||
share/libtextcat/LM/belarus-windows1251.lm
|
||||
share/libtextcat/LM/bosnian.lm
|
||||
share/libtextcat/LM/breton.lm
|
||||
share/libtextcat/LM/bulgarian-iso8859_5.lm
|
||||
share/libtextcat/LM/catalan.lm
|
||||
share/libtextcat/LM/chinese-big5.lm
|
||||
share/libtextcat/LM/chinese-gb2312.lm
|
||||
share/libtextcat/LM/croatian-ascii.lm
|
||||
share/libtextcat/LM/czech-iso8859_2.lm
|
||||
share/libtextcat/LM/danish.lm
|
||||
share/libtextcat/LM/drents.lm
|
||||
share/libtextcat/LM/dutch.lm
|
||||
share/libtextcat/LM/english.lm
|
||||
share/libtextcat/LM/esperanto.lm
|
||||
share/libtextcat/LM/estonian.lm
|
||||
share/libtextcat/LM/finnish.lm
|
||||
share/libtextcat/LM/french.lm
|
||||
share/libtextcat/LM/frisian.lm
|
||||
share/libtextcat/LM/georgian.lm
|
||||
share/libtextcat/LM/german.lm
|
||||
share/libtextcat/LM/greek-iso8859-7.lm
|
||||
share/libtextcat/LM/hebrew-iso8859_8.lm
|
||||
share/libtextcat/LM/hindi.lm
|
||||
share/libtextcat/LM/hungarian.lm
|
||||
share/libtextcat/LM/icelandic.lm
|
||||
share/libtextcat/LM/indonesian.lm
|
||||
share/libtextcat/LM/irish.lm
|
||||
share/libtextcat/LM/italian.lm
|
||||
share/libtextcat/LM/japanese-euc_jp.lm
|
||||
share/libtextcat/LM/japanese-shift_jis.lm
|
||||
share/libtextcat/LM/korean.lm
|
||||
share/libtextcat/LM/latin.lm
|
||||
share/libtextcat/LM/latvian.lm
|
||||
share/libtextcat/LM/lithuanian.lm
|
||||
share/libtextcat/LM/malay.lm
|
||||
share/libtextcat/LM/manx.lm
|
||||
share/libtextcat/LM/marathi.lm
|
||||
share/libtextcat/LM/middle_frisian.lm
|
||||
share/libtextcat/LM/mingo.lm
|
||||
share/libtextcat/LM/nepali.lm
|
||||
share/libtextcat/LM/norwegian.lm
|
||||
share/libtextcat/LM/persian.lm
|
||||
share/libtextcat/LM/polish.lm
|
||||
share/libtextcat/LM/portuguese.lm
|
||||
share/libtextcat/LM/quechua.lm
|
||||
share/libtextcat/LM/romanian.lm
|
||||
share/libtextcat/LM/rumantsch.lm
|
||||
share/libtextcat/LM/russian-iso8859_5.lm
|
||||
share/libtextcat/LM/russian-koi8_r.lm
|
||||
share/libtextcat/LM/russian-windows1251.lm
|
||||
share/libtextcat/LM/sanskrit.lm
|
||||
share/libtextcat/LM/scots.lm
|
||||
share/libtextcat/LM/scots_gaelic.lm
|
||||
share/libtextcat/LM/serbian-ascii.lm
|
||||
share/libtextcat/LM/slovak-ascii.lm
|
||||
share/libtextcat/LM/slovak-windows1250.lm
|
||||
share/libtextcat/LM/slovenian-ascii.lm
|
||||
share/libtextcat/LM/slovenian-iso8859_2.lm
|
||||
share/libtextcat/LM/spanish.lm
|
||||
share/libtextcat/LM/swahili.lm
|
||||
share/libtextcat/LM/swedish.lm
|
||||
share/libtextcat/LM/tagalog.lm
|
||||
share/libtextcat/LM/tamil.lm
|
||||
share/libtextcat/LM/thai.lm
|
||||
share/libtextcat/LM/turkish.lm
|
||||
share/libtextcat/LM/ukrainian-koi8_r.lm
|
||||
share/libtextcat/LM/vietnamese.lm
|
||||
share/libtextcat/LM/welsh.lm
|
||||
share/libtextcat/LM/yiddish-utf.lm
|
||||
share/libtextcat/conf.txt
|
Loading…
Reference in New Issue
Block a user