diff --git a/textproc/libtextcat/Makefile b/textproc/libtextcat/Makefile new file mode 100755 index 00000000000..cc8b944546e --- /dev/null +++ b/textproc/libtextcat/Makefile @@ -0,0 +1,42 @@ +# $OpenBSD: Makefile,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $ + +COMMENT= language guessing library + +DISTNAME= libtextcat-2.2 + +SHARED_LIBS += textcat 0.0 # .0.0 + +CATEGORIES= textproc + +HOMEPAGE= http://software.wise-guys.nl/libtextcat/ + +# GPLv2 +PERMIT_PACKAGE_CDROM= Yes +PERMIT_PACKAGE_FTP= Yes +PERMIT_DISTFILES_CDROM= Yes +PERMIT_DISTFILES_FTP= Yes + +WANTLIB += c + +MASTER_SITES= http://software.wise-guys.nl/download/ + +USE_LIBTOOL= Yes + +CONFIGURE_STYLE= gnu +CONFIGURE_ARGS= ${CONFIGURE_SHARED} + +post-install: + ${INSTALL_DATA} ${WRKSRC}/src/textcat.h ${PREFIX}/include/ + ${INSTALL_DATA_DIR} ${PREFIX}/share/libtextcat/LM + ${INSTALL_DATA} ${WRKSRC}/langclass/LM/*.lm ${PREFIX}/share/libtextcat/LM/ + ${INSTALL_DATA} ${WRKSRC}/langclass/conf.txt ${PREFIX}/share/libtextcat + ${INSTALL_DATA_DIR} ${PREFIX}/share/doc/libtextcat + ${INSTALL_DATA} ${WRKSRC}/README ${PREFIX}/share/doc/libtextcat + +do-regress: + cd ${WRKSRC}/langclass/ && \ + for t in `ls ShortTexts/*.txt`; do \ + ../src/testtextcat conf.txt < $$t; \ + done + +.include diff --git a/textproc/libtextcat/distinfo b/textproc/libtextcat/distinfo new file mode 100644 index 00000000000..72f4dc13fcc --- /dev/null +++ b/textproc/libtextcat/distinfo @@ -0,0 +1,5 @@ +MD5 (libtextcat-2.2.tar.gz) = Eoz8hu1ZU+V/4PWumLYsLg== +RMD160 (libtextcat-2.2.tar.gz) = IFi6tmQkYP2u4TmT3QXaKapRbvo= +SHA1 (libtextcat-2.2.tar.gz) = 6Y1xSdaiD9u1jMC3nLXj+VrjBOQ= +SHA256 (libtextcat-2.2.tar.gz) = Vne63/xIqNMy40XqT+Il41d/U/yV3u7IMGAAslaCllU= +SIZE (libtextcat-2.2.tar.gz) = 540999 diff --git a/textproc/libtextcat/pkg/DESCR b/textproc/libtextcat/pkg/DESCR new file mode 100755 index 00000000000..39d892c2caf --- /dev/null +++ b/textproc/libtextcat/pkg/DESCR @@ -0,0 +1,4 @@ +Libtextcat is a library with functions that implement the classification +technique described in Cavnar & Trenkle, "N-Gram-Based Text +Categorization". It was primarily developed for language guessing, a +task on which it is known to perform with near-perfect accuracy. diff --git a/textproc/libtextcat/pkg/PFRAG.shared b/textproc/libtextcat/pkg/PFRAG.shared new file mode 100644 index 00000000000..9e2101a3b86 --- /dev/null +++ b/textproc/libtextcat/pkg/PFRAG.shared @@ -0,0 +1,2 @@ +@comment $OpenBSD: PFRAG.shared,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $ +@lib lib/libtextcat.so.${LIBtextcat_VERSION} diff --git a/textproc/libtextcat/pkg/PLIST b/textproc/libtextcat/pkg/PLIST new file mode 100644 index 00000000000..4036ae2ac11 --- /dev/null +++ b/textproc/libtextcat/pkg/PLIST @@ -0,0 +1,87 @@ +@comment $OpenBSD: PLIST,v 1.1.1.1 2009/07/31 15:42:58 ajacoutot Exp $ +%%SHARED%% +@bin bin/createfp +include/textcat.h +lib/libtextcat.a +lib/libtextcat.la +share/doc/libtextcat/ +share/doc/libtextcat/README +share/libtextcat/ +share/libtextcat/LM/ +share/libtextcat/LM/afrikaans.lm +share/libtextcat/LM/albanian.lm +share/libtextcat/LM/amharic-utf.lm +share/libtextcat/LM/arabic-iso8859_6.lm +share/libtextcat/LM/arabic-windows1256.lm +share/libtextcat/LM/armenian.lm +share/libtextcat/LM/basque.lm +share/libtextcat/LM/belarus-windows1251.lm +share/libtextcat/LM/bosnian.lm +share/libtextcat/LM/breton.lm +share/libtextcat/LM/bulgarian-iso8859_5.lm +share/libtextcat/LM/catalan.lm +share/libtextcat/LM/chinese-big5.lm +share/libtextcat/LM/chinese-gb2312.lm +share/libtextcat/LM/croatian-ascii.lm +share/libtextcat/LM/czech-iso8859_2.lm +share/libtextcat/LM/danish.lm +share/libtextcat/LM/drents.lm +share/libtextcat/LM/dutch.lm +share/libtextcat/LM/english.lm +share/libtextcat/LM/esperanto.lm +share/libtextcat/LM/estonian.lm +share/libtextcat/LM/finnish.lm +share/libtextcat/LM/french.lm +share/libtextcat/LM/frisian.lm +share/libtextcat/LM/georgian.lm +share/libtextcat/LM/german.lm +share/libtextcat/LM/greek-iso8859-7.lm +share/libtextcat/LM/hebrew-iso8859_8.lm +share/libtextcat/LM/hindi.lm +share/libtextcat/LM/hungarian.lm +share/libtextcat/LM/icelandic.lm +share/libtextcat/LM/indonesian.lm +share/libtextcat/LM/irish.lm +share/libtextcat/LM/italian.lm +share/libtextcat/LM/japanese-euc_jp.lm +share/libtextcat/LM/japanese-shift_jis.lm +share/libtextcat/LM/korean.lm +share/libtextcat/LM/latin.lm +share/libtextcat/LM/latvian.lm +share/libtextcat/LM/lithuanian.lm +share/libtextcat/LM/malay.lm +share/libtextcat/LM/manx.lm +share/libtextcat/LM/marathi.lm +share/libtextcat/LM/middle_frisian.lm +share/libtextcat/LM/mingo.lm +share/libtextcat/LM/nepali.lm +share/libtextcat/LM/norwegian.lm +share/libtextcat/LM/persian.lm +share/libtextcat/LM/polish.lm +share/libtextcat/LM/portuguese.lm +share/libtextcat/LM/quechua.lm +share/libtextcat/LM/romanian.lm +share/libtextcat/LM/rumantsch.lm +share/libtextcat/LM/russian-iso8859_5.lm +share/libtextcat/LM/russian-koi8_r.lm +share/libtextcat/LM/russian-windows1251.lm +share/libtextcat/LM/sanskrit.lm +share/libtextcat/LM/scots.lm +share/libtextcat/LM/scots_gaelic.lm +share/libtextcat/LM/serbian-ascii.lm +share/libtextcat/LM/slovak-ascii.lm +share/libtextcat/LM/slovak-windows1250.lm +share/libtextcat/LM/slovenian-ascii.lm +share/libtextcat/LM/slovenian-iso8859_2.lm +share/libtextcat/LM/spanish.lm +share/libtextcat/LM/swahili.lm +share/libtextcat/LM/swedish.lm +share/libtextcat/LM/tagalog.lm +share/libtextcat/LM/tamil.lm +share/libtextcat/LM/thai.lm +share/libtextcat/LM/turkish.lm +share/libtextcat/LM/ukrainian-koi8_r.lm +share/libtextcat/LM/vietnamese.lm +share/libtextcat/LM/welsh.lm +share/libtextcat/LM/yiddish-utf.lm +share/libtextcat/conf.txt