subtitle extraction, part of ogmrip. Contains the two commands that
bridge the gap between mplayer/gocr and full text subtitle extraction, some manpages, and a sample shell script that can perform subtitle extraction.
This commit is contained in:
parent
a514d2921a
commit
0f4805b64e
39
multimedia/subrip/Makefile
Normal file
39
multimedia/subrip/Makefile
Normal file
@ -0,0 +1,39 @@
|
||||
# $OpenBSD: Makefile,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
|
||||
COMMENT= "extract subtitles from dvds"
|
||||
DISTNAME= ogmrip-0.10.0
|
||||
PKGNAME= subrip-0.10.0
|
||||
CATEGORIES= multimedia converters
|
||||
MASTER_SITES= ${MASTER_SITE_SOURCEFORGE:=ogmrip/}
|
||||
|
||||
HOMEPAGE= http://ogmrip.sourceforge.net/
|
||||
|
||||
MAINTAINER= Marc Espie <espie@openbsd.org>
|
||||
|
||||
# GPL
|
||||
PERMIT_DISTFILES_CDROM= Yes
|
||||
PERMIT_DISTFILES_FTP= Yes
|
||||
PERMIT_PACKAGE_CDROM= Yes
|
||||
PERMIT_PACKAGE_FTP= Yes
|
||||
|
||||
WRKDIST= ${WRKDIR}/${DISTNAME}/subrip
|
||||
|
||||
do-configure:
|
||||
ln -sf ${FILESDIR}/Makefile ${WRKDIST}
|
||||
|
||||
RUN_DEPENDS= ::graphics/gocr \
|
||||
::x11/mplayer
|
||||
|
||||
WANTLIB= c m
|
||||
|
||||
FAKE_FLAGS= BINDIR=${TRUEPREFIX}/bin \
|
||||
INSTALL_PROGRAM="${INSTALL_PROGRAM}" \
|
||||
DESTDIR=${WRKINST}
|
||||
|
||||
post-install:
|
||||
${INSTALL_PROGRAM} ${FILESDIR}/subrip ${PREFIX}/bin
|
||||
${INSTALL_MAN} ${FILESDIR}/subrip.1 ${PREFIX}/man/man1
|
||||
${INSTALL_MAN} ${FILESDIR}/srttool.1 ${PREFIX}/man/man1
|
||||
${INSTALL_MAN} ${FILESDIR}/sub2pgm.1 ${PREFIX}/man/man1
|
||||
|
||||
.include <bsd.port.mk>
|
4
multimedia/subrip/distinfo
Normal file
4
multimedia/subrip/distinfo
Normal file
@ -0,0 +1,4 @@
|
||||
MD5 (ogmrip-0.10.0.tar.gz) = cef80b77b551a6e7accc1441b62c1b4a
|
||||
RMD160 (ogmrip-0.10.0.tar.gz) = d639a2adb7a3008db1f72f9d82c0c9591393bf80
|
||||
SHA1 (ogmrip-0.10.0.tar.gz) = 7244d56d9c216ef0e6ab91e685d8e65ecc2774e3
|
||||
SIZE (ogmrip-0.10.0.tar.gz) = 623001
|
31
multimedia/subrip/files/Makefile
Normal file
31
multimedia/subrip/files/Makefile
Normal file
@ -0,0 +1,31 @@
|
||||
PROGS= srttool sub2pgm
|
||||
OBJ_SRT=srttool.o
|
||||
LIB_SRT=-lm
|
||||
LIB_SUB=-lm
|
||||
OBJ_SUB=mp_msg.o spudec.o subrip.o vobsub.o
|
||||
BINDIR=/usr/local/bin
|
||||
INSTALL_PROGRAM=cp
|
||||
|
||||
.PHONY: all install clean
|
||||
|
||||
all: ${PROGS}
|
||||
|
||||
install:
|
||||
for i in ${PROGS}; do \
|
||||
${INSTALL_PROGRAM} $$i ${DESTDIR}${BINDIR}; \
|
||||
done
|
||||
|
||||
clean:
|
||||
-rm ${OBJ_SRT} ${OBJ_SUB} ${PROGS}
|
||||
|
||||
srttool: ${OBJ_SRT}
|
||||
${CC} ${CFLAGS} -o $@ ${OBJ_SRT} ${LIB_SRT}
|
||||
|
||||
sub2pgm: ${OBJ_SUB}
|
||||
${CC} ${CFLAGS} -o $@ ${OBJ_SUB} ${LIB_SUB}
|
||||
|
||||
|
||||
.SUFFIXES: .c .o
|
||||
|
||||
.c.o:
|
||||
${CC} -c ${CFLAGS} -I. $*.c
|
81
multimedia/subrip/files/srttool.1
Normal file
81
multimedia/subrip/files/srttool.1
Normal file
@ -0,0 +1,81 @@
|
||||
.\" $OpenBSD: srttool.1,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
.\" Public domain
|
||||
.Dd January 20, 2007
|
||||
.Dt SRTTOOL 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm srttool
|
||||
.Nd manipulates .srt subtitle files
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl hrsvw
|
||||
.Op Fl a Ar hh:mm:ss,ms
|
||||
.Op Fl c Ar first[,last]
|
||||
.Op Fl d Ar seconds
|
||||
.Op Fl e Ar seconds
|
||||
.Op Fl i Ar file
|
||||
.Op Fl o Ar file
|
||||
.Op Fl x Ar basename
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
command performs various manipulations on srt files, acting as a filter
|
||||
from standard input to standard output unless
|
||||
.Fl i
|
||||
or
|
||||
.Fl o
|
||||
options are used.
|
||||
.Pp
|
||||
.Nm
|
||||
can be used to handle so-called template .srt files,
|
||||
where each subtitle entry is actually a filename to be substituted
|
||||
for the entry, or to adjust timestamps on each subtitle entry.
|
||||
.Pp
|
||||
The options are as follows
|
||||
.Bl -tag -width Flooutput
|
||||
.It Fl a Ar hh:mm:ss,ms
|
||||
Adjust all time stamps so that the first tag begins at
|
||||
.Ar hh:mm:ss,ms .
|
||||
.It Fl c Ar first[,last]
|
||||
Write only entries numbered from
|
||||
.Ar first
|
||||
to
|
||||
.Ar last ,
|
||||
where
|
||||
.Ar last
|
||||
defaults to the last entry of the file.
|
||||
.It Fl d Ar seconds
|
||||
Shift all time stamps by
|
||||
.Ar seconds
|
||||
seconds.
|
||||
.It Fl e Ar seconds
|
||||
Expand the subtitle hour by
|
||||
.Ar seconds
|
||||
seconds.
|
||||
.It Fl h
|
||||
Display usage and quit.
|
||||
.It Fl i Ar filename
|
||||
Use file
|
||||
.Ar filename
|
||||
for input, defaults to stdin.
|
||||
.It Fl o Ar filename
|
||||
Use file
|
||||
.Ar filename
|
||||
for output, defaults to stdout.
|
||||
.It Fl r
|
||||
Renumber all entries.
|
||||
.It Fl s
|
||||
Convert template srt file into real srt files, substitute filename in each
|
||||
subtitle entry by the file contents.
|
||||
.It Fl v
|
||||
Verbose mode.
|
||||
.It Fl w
|
||||
Remove leading white space
|
||||
.It Fl x Ar basename
|
||||
Converse of
|
||||
.Fl s
|
||||
operation, create <basename>.srtx template and <basename>XXX.txt individual
|
||||
files.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr subrip 1 .
|
51
multimedia/subrip/files/sub2pgm.1
Normal file
51
multimedia/subrip/files/sub2pgm.1
Normal file
@ -0,0 +1,51 @@
|
||||
.\" $OpenBSD: sub2pgm.1,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
.\" Public domain
|
||||
.Dd January 20, 2007
|
||||
.Dt SUB2PGM 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm sub2pgm
|
||||
.Nd "convert vobsub dvd subtitles into pgm files and srt template"
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl hd
|
||||
.Op Fl o Ar srtbase
|
||||
.Op Fl t Ar sid
|
||||
.Ar vobsub
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
converts dvd subtitles from the vobsub format into an intermediate form
|
||||
suitable for OCR extraction.
|
||||
.Pp
|
||||
.Nm
|
||||
takes two files as input, which should be named <vobsub>.idx and <vobsub>.sub,
|
||||
where
|
||||
.Ar vobsub
|
||||
is the basename of the files.
|
||||
.Pp
|
||||
.Nm
|
||||
outputs one .pgm graphics file for each subtitle image,
|
||||
and a .srt template file with correct timing annotation, where
|
||||
each subtitle entry is replaced with the corresponding graphics file entry
|
||||
with .txt appended.
|
||||
.Pp
|
||||
After OCR conversion, such a file can be transformed into a proper srt file
|
||||
by
|
||||
.Xr srttool 1 .
|
||||
The options are as follows:
|
||||
.Bl -tag -width Flooutput
|
||||
.It Fl d
|
||||
Displays debugging information as subtitle info is extracted.
|
||||
.It Fl h
|
||||
Displays usage and quits.
|
||||
.It Fl o srtbase
|
||||
Basename for all .pgm output files and the .srt template.
|
||||
.It Fl t sid
|
||||
Selects subtitle id from a vobsub file with several subtitle streams.
|
||||
Defaults to 0.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr gocr 1 ,
|
||||
.Xr mencoder 1 ,
|
||||
.Xr srttool 1 .
|
122
multimedia/subrip/files/subrip
Normal file
122
multimedia/subrip/files/subrip
Normal file
@ -0,0 +1,122 @@
|
||||
#! /bin/sh
|
||||
# $OpenBSD: subrip,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
#
|
||||
# Copyright (c) 2007 Marc Espie <espie@openbsd.org>
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software for any
|
||||
# purpose with or without fee is hereby granted, provided that the above
|
||||
# copyright notice and this permission notice appear in all copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
|
||||
: ${TMPDIR:=/tmp}
|
||||
|
||||
device=/dev/rcd0c
|
||||
sidlist=
|
||||
verbose=true
|
||||
keep=false
|
||||
mextra=
|
||||
gextra=
|
||||
|
||||
args=`getopt qkd:g:m:s:t:q $*`
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo 'Usage: subrip [-qk] [-d dev] [-m mopt] [-g gopt] -s sid... -t title output'
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# exit on error
|
||||
set -e
|
||||
|
||||
set -- $args
|
||||
for i
|
||||
do
|
||||
case "$i" in
|
||||
-s)
|
||||
sidlist="$sidlist $2"; shift; shift;;
|
||||
-d)
|
||||
device=$2; shift; shift;;
|
||||
-t)
|
||||
title=$2; shift; shift;;
|
||||
-q)
|
||||
verbose=false; shift;;
|
||||
-k)
|
||||
keep=true; shift;;
|
||||
-m)
|
||||
mextra="$mextra $2";shift; shift;;
|
||||
-g)
|
||||
gextra="$gextra $2";shift; shift;;
|
||||
--)
|
||||
shift; break;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
output=$1
|
||||
else
|
||||
output=subtitle
|
||||
fi
|
||||
|
||||
if $verbose; then
|
||||
mextra="-v $mextra"
|
||||
fi
|
||||
|
||||
case "X$sidlist" in
|
||||
X)
|
||||
echo "Error: must specify some subtitles to copy"
|
||||
exit 2;;
|
||||
esac
|
||||
|
||||
case "X$title" in
|
||||
X)
|
||||
echo "Error: must specify some title to copy"
|
||||
exit 2;;
|
||||
esac
|
||||
|
||||
mytmp=`mktemp -d ${TMPDIR}/subrip.XXXXXXX`
|
||||
if $verbose; then
|
||||
echo "Putting files into $mytmp"
|
||||
fi
|
||||
|
||||
for sid in $sidlist
|
||||
do
|
||||
# grab the raw subtitles
|
||||
set -x
|
||||
mencoder \
|
||||
-dvd-device ${device} \
|
||||
-nocache -nosound \
|
||||
-of rawaudio -ovc copy -o /dev/null \
|
||||
-vobsubout $mytmp/vobsub-$sid \
|
||||
-quiet \
|
||||
$mextra \
|
||||
-sid $sid \
|
||||
dvd://$title
|
||||
set +x
|
||||
# convert to pgm images
|
||||
sub2pgm -o $mytmp/z$sid $mytmp/vobsub-$sid
|
||||
$keep || rm -f $mytmp/vobsub-$sid*
|
||||
done
|
||||
|
||||
mkdir -p db
|
||||
# ask gocr what it can do with them
|
||||
for i in $mytmp/*.pgm
|
||||
do
|
||||
if $verbose; then
|
||||
echo "Converting $i into text"
|
||||
fi
|
||||
gocr $gextra -m 130 $i >$i.txt
|
||||
done
|
||||
$keep || rm -f $mytmp/*.pgm
|
||||
|
||||
for sid in $sidlist
|
||||
do
|
||||
srttool -s <$mytmp/z$sid.srtx >$output.$sid
|
||||
done
|
||||
$keep || rm -rf $mytmp
|
90
multimedia/subrip/files/subrip.1
Normal file
90
multimedia/subrip/files/subrip.1
Normal file
@ -0,0 +1,90 @@
|
||||
.\" $OpenBSD: subrip.1,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
.\" Public domain
|
||||
.Dd January 20, 2007
|
||||
.Dt SUBRIP 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm subrip
|
||||
.Nd extract subtitles from dvd
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl qk
|
||||
.Op Fl d Ar device
|
||||
.Op Fl m Ar mencoder-cmd
|
||||
.Op Fl g Ar gocr-cmd
|
||||
.Fl s Ar sid
|
||||
.Op Fl s Ar sid
|
||||
.Fl t Ar title
|
||||
.Ar output
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
command
|
||||
extracts subtitles from a dvd using
|
||||
.Xr mplayer 1
|
||||
and passes them to
|
||||
.Xr gocr 1
|
||||
for conversion into srt files.
|
||||
The resulting files are named
|
||||
.Pa output.<sid> .
|
||||
.Pp
|
||||
.Nm
|
||||
is a simple shell-script that acts as a front-end and can be
|
||||
customized as needed.
|
||||
.Pp
|
||||
The extraction is interactive, as
|
||||
.Xr gocr 1
|
||||
often will need some help recognizing some character shapes.
|
||||
.Pp
|
||||
The options are as follows:
|
||||
.Bl -tag -width Flooutput
|
||||
.It Fl d Ar device
|
||||
Select dvd device.
|
||||
Defaults to
|
||||
.Pa /dev/rcd0c .
|
||||
.It Fl g Ar gocr-cmd
|
||||
Add extra arguments to the invocation of
|
||||
.Xr gocr 1 .
|
||||
Thanks to bugs in
|
||||
.Xr getopt 1 ,
|
||||
one should use several
|
||||
.Fl g
|
||||
options to specify an option with arguments.
|
||||
.It Fl k
|
||||
Keep temporary files.
|
||||
.It Fl m Ar mencoder-cmd
|
||||
Add extra arguments to the invocation of
|
||||
.Xr mencoder 1 .
|
||||
Thanks to bugs in
|
||||
.Xr getopt 1 ,
|
||||
one should use several
|
||||
.Fl m
|
||||
options to specify an option with arguments.
|
||||
.It Fl q
|
||||
Quiet mode, do not display
|
||||
.Xr mencoder 1
|
||||
progress.
|
||||
.It Fl s Ar sid
|
||||
Select subtitle id to copy.
|
||||
At least one subtitle stream should be selected.
|
||||
.It Fl t Ar title
|
||||
Select title to copy.
|
||||
Mandatory.
|
||||
.Sh FILES
|
||||
The directory
|
||||
.Pa db
|
||||
is used by
|
||||
.Xr gocr 1
|
||||
to store user-supplied information to help in character recognition.
|
||||
.Pp
|
||||
.Nm
|
||||
stores temporary files into a temporary subdirectory of
|
||||
.Va TMPDIR ,
|
||||
or
|
||||
.Pa /tmp
|
||||
if not set.
|
||||
.Sh SEE ALSO
|
||||
.Xr gocr 1 ,
|
||||
.Xr mencoder 1 ,
|
||||
.Xr srttool 1 ,
|
||||
.Xr sub2pgm 1
|
19
multimedia/subrip/patches/patch-mp_msg_c
Normal file
19
multimedia/subrip/patches/patch-mp_msg_c
Normal file
@ -0,0 +1,19 @@
|
||||
$OpenBSD: patch-mp_msg_c,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
--- mp_msg.c.orig Thu Jan 18 12:31:44 2007
|
||||
+++ mp_msg.c Thu Jan 18 12:32:51 2007
|
||||
@@ -7,10 +7,15 @@
|
||||
|
||||
#include "mp_msg.h"
|
||||
|
||||
+extern int debug_messages;
|
||||
+
|
||||
void
|
||||
mp_msg (int module, int level, const char *format, ...)
|
||||
{
|
||||
va_list va;
|
||||
+
|
||||
+ if (level == MSGL_DBG2 && !debug_messages)
|
||||
+ return;
|
||||
|
||||
va_start (va, format);
|
||||
vfprintf (stderr, format, va);
|
26
multimedia/subrip/patches/patch-srttool_c
Normal file
26
multimedia/subrip/patches/patch-srttool_c
Normal file
@ -0,0 +1,26 @@
|
||||
$OpenBSD: patch-srttool_c,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
--- srttool.c.orig Mon Jan 22 14:47:45 2007
|
||||
+++ srttool.c Mon Jan 22 14:48:32 2007
|
||||
@@ -70,8 +70,7 @@ usage (void)
|
||||
fprintf (stderr,
|
||||
"\t -w Remove leading white space in text lines\n");
|
||||
fprintf (stderr,
|
||||
- "\t -e <seconds> 'Expand' the subtitle hour by <seconds>\n"
|
||||
- "\t (valid values are -60.0<=x<=+60.0 seconds)\n");
|
||||
+ "\t -e <seconds> 'Expand' the subtitle hour by <seconds>\n");
|
||||
fprintf (stderr, "\t Example: \n");
|
||||
fprintf (stderr, "\t Adjust the subtitle timing by -2.3 seconds. \n");
|
||||
fprintf (stderr,
|
||||
@@ -525,12 +524,6 @@ main (int argc, char **argv)
|
||||
if (n != 1)
|
||||
{
|
||||
fprintf (stderr, "no time specified with option -e\n");
|
||||
- exit (EXIT_FAILURE);
|
||||
- }
|
||||
- // complain about an adjustment of more than 60 seconds
|
||||
- if (fabs (hour_expansion > 60))
|
||||
- {
|
||||
- fprintf (stderr, "Parameter to option -e to large.\n");
|
||||
exit (EXIT_FAILURE);
|
||||
}
|
||||
// calculate the resulting scaling factor
|
34
multimedia/subrip/patches/patch-subrip_c
Normal file
34
multimedia/subrip/patches/patch-subrip_c
Normal file
@ -0,0 +1,34 @@
|
||||
$OpenBSD: patch-subrip_c,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
--- subrip.c.orig Thu Jan 18 12:30:28 2007
|
||||
+++ subrip.c Thu Jan 18 12:31:41 2007
|
||||
@@ -16,6 +16,7 @@ static spudec_handle_t *spudec;
|
||||
static unsigned int sub_idx;
|
||||
static FILE *fsrtx;
|
||||
|
||||
+int debug_messages=0;
|
||||
int sub_pos;
|
||||
int vobsub_id;
|
||||
|
||||
@@ -23,6 +24,7 @@ static void
|
||||
usage (void)
|
||||
{
|
||||
fprintf (stderr, "subp2pgm [options] <vobsub basename>\n");
|
||||
+ fprintf (stderr, " -d - debug messages\n");
|
||||
fprintf (stderr, " -o <filename> - the output base filename\n");
|
||||
fprintf (stderr, " -t <sid> - the subtitle id, default 0\n");
|
||||
}
|
||||
@@ -115,10 +117,13 @@ main (int argc, char **argv)
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
- while ((ch = getopt (argc, argv, "t:o:h")) != -1)
|
||||
+ while ((ch = getopt (argc, argv, "dt:o:h")) != -1)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
+ case 'd':
|
||||
+ debug_messages = 1;
|
||||
+ break;
|
||||
case 't':
|
||||
if (sscanf (optarg, "%d", &vobsub_id) != 1)
|
||||
{
|
11
multimedia/subrip/pkg/DESCR
Normal file
11
multimedia/subrip/pkg/DESCR
Normal file
@ -0,0 +1,11 @@
|
||||
ogmrip is originally a graphics package to convert dvd into ogm files.
|
||||
This package only takes the subtitle extraction part.
|
||||
Namely:
|
||||
- sub2pgm: converts vobsub files into an srt template and a set of pgm images.
|
||||
- srttool: manipulates srt templates and srt files.
|
||||
|
||||
Together with mplayer, which can extract vobsub, and gocr, to handle character
|
||||
recognition, this allows for simple subtitle extraction.
|
||||
|
||||
The package contains a shell-script, subrip, that puts all the pieces
|
||||
together and can be tailored for further use.
|
7
multimedia/subrip/pkg/PLIST
Normal file
7
multimedia/subrip/pkg/PLIST
Normal file
@ -0,0 +1,7 @@
|
||||
@comment $OpenBSD: PLIST,v 1.1.1.1 2007/01/22 13:55:42 espie Exp $
|
||||
bin/srttool
|
||||
bin/sub2pgm
|
||||
bin/subrip
|
||||
@man man/man1/srttool.1
|
||||
@man man/man1/sub2pgm.1
|
||||
@man man/man1/subrip.1
|
Loading…
Reference in New Issue
Block a user