openbsd-ports/misc/wordnet/patches/patch-lib_search_c
sthen f6c9102d1a updated patch from Rob Holland, his commentary:
"Andreas Tille, the Debian WordNet maintainer, noticed a bug in my
patch. The bug is not security related, but causes incorrect behaviour
in WordNet.

I replaced a strncpy(s1, s2, strlen(s2)) with a strcpy forgetting that
strncpy invoked that way would always omit the trailing \0 (as the \0
would always be at strlen(s2) + 1). This resulted in a truncation of
output from WordNet which relied on the previous behavior which it
used to 'patch' s1. I've now adjusted the strncpy to be a memcpy and
added a comment, to make the intent of the code clear. (Using a str*
function when you don't wish any handling of \0 is unintuitive to me,
hence my mistake). [..] Apologies for the error."

thanks Rob for the exemplary handling of this advisory. Notifications
to package maintainers and follow-ups are almost unheard-of and very
welcome.
2008-09-06 21:49:15 +00:00

375 lines
12 KiB
Plaintext

$OpenBSD: patch-lib_search_c,v 1.2 2008/09/06 21:49:15 sthen Exp $
--- lib/search.c.orig Wed Nov 29 21:02:21 2006
+++ lib/search.c Sat Sep 6 22:44:37 2008
@@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include <assert.h>
+#include <limits.h>
#include "wn.h"
@@ -119,33 +120,22 @@ IndexPtr parse_index(long offset, int dbase, char *lin
if ( !line )
line = read_index( offset, indexfps[dbase] );
- idx = (IndexPtr)malloc(sizeof(Index));
+ idx = (IndexPtr)calloc(1, sizeof(Index));
assert(idx);
/* set offset of entry in index file */
idx->idxoffset = offset;
- idx->wd='\0';
- idx->pos='\0';
- idx->off_cnt=0;
- idx->tagged_cnt = 0;
- idx->sense_cnt=0;
- idx->offset='\0';
- idx->ptruse_cnt=0;
- idx->ptruse='\0';
-
/* get the word */
ptrtok=strtok(line," \n");
- idx->wd = malloc(strlen(ptrtok) + 1);
+ idx->wd = strdup(ptrtok);
assert(idx->wd);
- strcpy(idx->wd, ptrtok);
/* get the part of speech */
ptrtok=strtok(NULL," \n");
- idx->pos = malloc(strlen(ptrtok) + 1);
+ idx->pos = strdup(ptrtok);
assert(idx->pos);
- strcpy(idx->pos, ptrtok);
/* get the collins count */
ptrtok=strtok(NULL," \n");
@@ -154,7 +144,12 @@ IndexPtr parse_index(long offset, int dbase, char *lin
/* get the number of pointers types */
ptrtok=strtok(NULL," \n");
idx->ptruse_cnt = atoi(ptrtok);
-
+
+ if (idx->ptruse_cnt < 0 || (unsigned int)idx->ptruse_cnt > UINT_MAX/sizeof(int)) {
+ free_index(idx);
+ return(NULL);
+ }
+
if (idx->ptruse_cnt) {
idx->ptruse = (int *) malloc(idx->ptruse_cnt * (sizeof(int)));
assert(idx->ptruse);
@@ -173,9 +168,14 @@ IndexPtr parse_index(long offset, int dbase, char *lin
/* get the number of senses that are tagged */
ptrtok=strtok(NULL," \n");
idx->tagged_cnt = atoi(ptrtok);
-
+
+ if (idx->off_cnt < 0 || (unsigned long)idx->off_cnt > ULONG_MAX/sizeof(long)) {
+ free_index(idx);
+ return(NULL);
+ }
+
/* make space for the offsets */
- idx->offset = (long *) malloc(idx->off_cnt * (sizeof(long)));
+ idx->offset = (unsigned long *) malloc(idx->off_cnt * sizeof(long));
assert(idx->offset);
/* get the offsets */
@@ -197,15 +197,21 @@ IndexPtr getindex(char *searchstr, int dbase)
char strings[MAX_FORMS][WORDBUF]; /* vector of search strings */
static IndexPtr offsets[MAX_FORMS];
static int offset;
-
+
/* This works like strrok(): if passed with a non-null string,
prepare vector of search strings and offsets. If string
is null, look at current list of offsets and return next
one, or NULL if no more alternatives for this word. */
if (searchstr != NULL) {
+ /* Bail out if the input is too long for us to handle */
+ if (strlen(searchstr) > (WORDBUF - 1)) {
+ strcpy(msgbuf, "WordNet library error: search term is too long\n");
+ display_message(msgbuf);
+ return(NULL);
+ }
- offset = 0;
+ offset = 0;
strtolower(searchstr);
for (i = 0; i < MAX_FORMS; i++) {
strcpy(strings[i], searchstr);
@@ -229,11 +235,11 @@ IndexPtr getindex(char *searchstr, int dbase)
/* Get offset of first entry. Then eliminate duplicates
and get offsets of unique strings. */
- if (strings[0][0] != NULL)
+ if (strings[0] != NULL)
offsets[0] = index_lookup(strings[0], dbase);
for (i = 1; i < MAX_FORMS; i++)
- if ((strings[i][0]) != NULL && (strcmp(strings[0], strings[i])))
+ if (strings[i] != NULL && (strcmp(strings[0], strings[i])))
offsets[i] = index_lookup(strings[i], dbase);
}
@@ -272,7 +278,7 @@ SynsetPtr read_synset(int dbase, long boffset, char *w
SynsetPtr parse_synset(FILE *fp, int dbase, char *word)
{
static char line[LINEBUF];
- char tbuf[SMLINEBUF];
+ char tbuf[SMLINEBUF] = "";
char *ptrtok;
char *tmpptr;
int foundpert = 0;
@@ -286,33 +292,11 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
if ((tmpptr = fgets(line, LINEBUF, fp)) == NULL)
return(NULL);
- synptr = (SynsetPtr)malloc(sizeof(Synset));
+ synptr = (SynsetPtr)calloc(1, sizeof(Synset));
assert(synptr);
-
- synptr->hereiam = 0;
+
synptr->sstype = DONT_KNOW;
- synptr->fnum = 0;
- synptr->pos = '\0';
- synptr->wcount = 0;
- synptr->words = '\0';
- synptr->whichword = 0;
- synptr->ptrcount = 0;
- synptr->ptrtyp = '\0';
- synptr->ptroff = '\0';
- synptr->ppos = '\0';
- synptr->pto = '\0';
- synptr->pfrm = '\0';
- synptr->fcount = 0;
- synptr->frmid = '\0';
- synptr->frmto = '\0';
- synptr->defn = '\0';
- synptr->key = 0;
- synptr->nextss = NULL;
- synptr->nextform = NULL;
synptr->searchtype = -1;
- synptr->ptrlist = NULL;
- synptr->headword = NULL;
- synptr->headsense = 0;
ptrtok = line;
@@ -322,7 +306,7 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
/* sanity check - make sure starting file offset matches first field */
if (synptr->hereiam != loc) {
- sprintf(msgbuf, "WordNet library error: no synset at location %d\n",
+ sprintf(msgbuf, "WordNet library error: no synset at location %ld\n",
loc);
display_message(msgbuf);
free(synptr);
@@ -335,16 +319,20 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
/* looking at POS */
ptrtok = strtok(NULL, " \n");
- synptr->pos = malloc(strlen(ptrtok) + 1);
+ synptr->pos = strdup(ptrtok);
assert(synptr->pos);
- strcpy(synptr->pos, ptrtok);
if (getsstype(synptr->pos) == SATELLITE)
synptr->sstype = INDIRECT_ANT;
/* looking at numwords */
ptrtok = strtok(NULL, " \n");
synptr->wcount = strtol(ptrtok, NULL, 16);
-
+
+ if (synptr->wcount < 0 || (unsigned int)synptr->wcount > UINT_MAX/sizeof(char *)) {
+ free_syns(synptr);
+ return(NULL);
+ }
+
synptr->words = (char **)malloc(synptr->wcount * sizeof(char *));
assert(synptr->words);
synptr->wnsns = (int *)malloc(synptr->wcount * sizeof(int));
@@ -354,9 +342,8 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
for (i = 0; i < synptr->wcount; i++) {
ptrtok = strtok(NULL, " \n");
- synptr->words[i] = malloc(strlen(ptrtok) + 1);
+ synptr->words[i] = strdup(ptrtok);
assert(synptr->words[i]);
- strcpy(synptr->words[i], ptrtok);
/* is this the word we're looking for? */
@@ -371,6 +358,12 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
ptrtok = strtok(NULL," \n");
synptr->ptrcount = atoi(ptrtok);
+ /* Should we check for long here as well? */
+ if (synptr->ptrcount < 0 || (unsigned int)synptr->ptrcount > UINT_MAX/sizeof(int)) {
+ free_syns(synptr);
+ return(NULL);
+ }
+
if (synptr->ptrcount) {
/* alloc storage for the pointers */
@@ -455,21 +448,23 @@ SynsetPtr parse_synset(FILE *fp, int dbase, char *word
ptrtok = strtok(NULL," \n");
if (ptrtok) {
ptrtok = strtok(NULL," \n");
- sprintf(tbuf, "");
while (ptrtok != NULL) {
+ if (strlen(ptrtok) + strlen(tbuf) + 1 + 1 > sizeof(tbuf)) {
+ free_syns(synptr);
+ return(NULL);
+ }
strcat(tbuf,ptrtok);
ptrtok = strtok(NULL, " \n");
if(ptrtok)
strcat(tbuf," ");
}
- assert((1 + strlen(tbuf)) < sizeof(tbuf));
- synptr->defn = malloc(strlen(tbuf) + 4);
+ synptr->defn = malloc(strlen(tbuf) + 3);
assert(synptr->defn);
sprintf(synptr->defn,"(%s)",tbuf);
}
if (keyindexfp) { /* we have unique keys */
- sprintf(tmpbuf, "%c:%8.8d", partchars[dbase], synptr->hereiam);
+ sprintf(tmpbuf, "%c:%8.8ld", partchars[dbase], synptr->hereiam);
synptr->key = GetKeyForOffset(tmpbuf);
}
@@ -635,7 +630,7 @@ static void traceptrs(SynsetPtr synptr, int ptrtyp, in
if ((ptrtyp == PERTPTR || ptrtyp == PPLPTR) &&
synptr->pto[i] != 0) {
- sprintf(tbuf, " (Sense %d)\n",
+ snprintf(tbuf, sizeof(tbuf), " (Sense %d)\n",
cursyn->wnsns[synptr->pto[i] - 1]);
printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
SKIP_ANTS, PRINT_MARKER);
@@ -656,7 +651,7 @@ static void traceptrs(SynsetPtr synptr, int ptrtyp, in
traceptrs(cursyn, HYPERPTR, getpos(cursyn->pos), 0);
}
} else if (ptrtyp == ANTPTR && dbase != ADJ && synptr->pto[i] != 0) {
- sprintf(tbuf, " (Sense %d)\n",
+ snprintf(tbuf, sizeof(tbuf), " (Sense %d)\n",
cursyn->wnsns[synptr->pto[i] - 1]);
printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
SKIP_ANTS, PRINT_MARKER);
@@ -817,7 +812,7 @@ static void tracenomins(SynsetPtr synptr, int dbase)
cursyn = read_synset(synptr->ppos[i], synptr->ptroff[i], "");
- sprintf(tbuf, "#%d\n",
+ snprintf(tbuf, sizeof(tbuf), "#%d\n",
cursyn->wnsns[synptr->pto[i] - 1]);
printsynset(prefix, cursyn, tbuf, DEFOFF, synptr->pto[i],
SKIP_ANTS, SKIP_MARKER);
@@ -989,12 +984,12 @@ void getexample(char *offset, char *wd)
char sentbuf[512];
if (vsentfilefp != NULL) {
- if (line = bin_search(offset, vsentfilefp)) {
+ if ((line = bin_search(offset, vsentfilefp)) != NULL) {
while(*line != ' ')
line++;
printbuffer(" EX: ");
- sprintf(sentbuf, line, wd);
+ snprintf(sentbuf, sizeof(sentbuf), line, wd);
printbuffer(sentbuf);
}
}
@@ -1011,7 +1006,7 @@ int findexample(SynsetPtr synptr)
if (vidxfilefp != NULL) {
wdnum = synptr->whichword - 1;
- sprintf(tbuf,"%s%%%-1.1d:%-2.2d:%-2.2d::",
+ snprintf(tbuf, sizeof(tbuf), "%s%%%-1.1d:%-2.2d:%-2.2d::",
synptr->words[wdnum],
getpos(synptr->pos),
synptr->fnum,
@@ -1124,7 +1119,7 @@ static void freq_word(IndexPtr index)
if (cnt >= 17 && cnt <= 32) familiar = 6;
if (cnt > 32 ) familiar = 7;
- sprintf(tmpbuf,
+ snprintf(tmpbuf, sizeof(tmpbuf),
"\n%s used as %s is %s (polysemy count = %d)\n",
index->wd, a_an[getpos(index->pos)], freqcats[familiar], cnt);
printbuffer(tmpbuf);
@@ -1147,6 +1142,9 @@ void wngrep (char *word_passed, int pos) {
}
rewind(inputfile);
+ if (strlen(word_passed) + 1 > sizeof(word))
+ return;
+
strcpy (word, word_passed);
ToLowerCase(word); /* map to lower case for index file search */
strsubst (word, ' ', '_'); /* replace spaces with underscores */
@@ -1169,7 +1167,7 @@ void wngrep (char *word_passed, int pos) {
((line[loc + wordlen] == '-') || (line[loc + wordlen] == '_')))
) {
strsubst (line, '_', ' ');
- sprintf (tmpbuf, "%s\n", line);
+ snprintf (tmpbuf, sizeof(tmpbuf), "%s\n", line);
printbuffer (tmpbuf);
break;
}
@@ -1570,7 +1568,8 @@ char *findtheinfo(char *searchstr, int dbase, int ptrt
bufstart[0] = '\n';
bufstart++;
}
- strncpy(bufstart, tmpbuf, strlen(tmpbuf));
+ /* Don't include the \0 */
+ memcpy(bufstart, tmpbuf, strlen(tmpbuf));
bufstart = searchbuffer + strlen(searchbuffer);
}
}
@@ -1683,9 +1682,8 @@ SynsetPtr traceptrs_ds(SynsetPtr synptr, int ptrtyp, i
cursyn = read_synset(synptr->ppos[i],
synptr->ptroff[i],
"");
- synptr->headword = malloc(strlen(cursyn->words[0]) + 1);
+ synptr->headword = strdup(cursyn->words[0]);
assert(synptr->headword);
- strcpy(synptr->headword, cursyn->words[0]);
synptr->headsense = cursyn->lexid[0];
free_synset(cursyn);
break;
@@ -2013,7 +2011,7 @@ static int getsearchsense(SynsetPtr synptr, int whichw
strsubst(strcpy(wdbuf, synptr->words[whichword - 1]), ' ', '_');
strtolower(wdbuf);
- if (idx = index_lookup(wdbuf, getpos(synptr->pos))) {
+ if ((idx = index_lookup(wdbuf, getpos(synptr->pos))) != NULL) {
for (i = 0; i < idx->off_cnt; i++)
if (idx->offset[i] == synptr->hereiam) {
free_index(idx);
@@ -2037,7 +2035,7 @@ static void printsynset(char *head, SynsetPtr synptr,
by flags */
if (offsetflag) /* print synset offset */
- sprintf(tbuf + strlen(tbuf),"{%8.8d} ", synptr->hereiam);
+ sprintf(tbuf + strlen(tbuf),"{%8.8ld} ", synptr->hereiam);
if (fileinfoflag) { /* print lexicographer file information */
sprintf(tbuf + strlen(tbuf), "<%s> ", lexfiles[synptr->fnum]);
prlexid = 1; /* print lexicographer id after word */
@@ -2072,7 +2070,7 @@ static void printantsynset(SynsetPtr synptr, char *tai
tbuf[0] = '\0';
if (offsetflag)
- sprintf(tbuf,"{%8.8d} ", synptr->hereiam);
+ sprintf(tbuf,"{%8.8ld} ", synptr->hereiam);
if (fileinfoflag) {
sprintf(tbuf + strlen(tbuf),"<%s> ", lexfiles[synptr->fnum]);
prlexid = 1;