0
0
mirror of https://github.com/vim/vim.git synced 2025-07-26 11:04:33 -04:00

updated for version 7.0126

This commit is contained in:
Bram Moolenaar 2005-08-10 07:51:35 +00:00
parent 8af244281c
commit 329cc7e429

View File

@ -50,6 +50,16 @@
* See ":help develop-spell". * See ":help develop-spell".
*/ */
/* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
* Only use for small word lists!
* SPELL_COMPRESS_CNT is in how many words we compress the tree. */
#if 0
# define SPELL_PRINTTREE
# define SPELL_COMPRESS_CNT 1
#else
# define SPELL_COMPRESS_CNT 1000000
#endif
/* /*
* Use this to adjust the score after finding suggestions, based on the * Use this to adjust the score after finding suggestions, based on the
* suggested word sounding like the bad word. This is much faster than doing * suggested word sounding like the bad word. This is much faster than doing
@ -719,6 +729,7 @@ static linenr_T apply_prefixes __ARGS((slang_T *slang, char_u *word, int round,
static char *e_format = N_("E759: Format error in spell file"); static char *e_format = N_("E759: Format error in spell file");
static char *e_spell_trunc = N_("E758: Truncated spell file"); static char *e_spell_trunc = N_("E758: Truncated spell file");
static char *msg_compressing = N_("Compressing word tree...");
/* /*
* Main spell-checking function. * Main spell-checking function.
@ -3106,7 +3117,7 @@ struct wordnode_S
{ {
union /* shared to save space */ union /* shared to save space */
{ {
char_u hashkey[6]; /* room for the hash key */ char_u hashkey[6]; /* the hash key, only used while compressing */
int index; /* index in written nodes (valid after first int index; /* index in written nodes (valid after first
round) */ round) */
} wn_u1; } wn_u1;
@ -3118,11 +3129,16 @@ struct wordnode_S
wordnode_T *wn_child; /* child (next byte in word) */ wordnode_T *wn_child; /* child (next byte in word) */
wordnode_T *wn_sibling; /* next sibling (alternate byte in word, wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
always sorted) */ always sorted) */
int wn_refs; /* nr of references to this node */
char_u wn_byte; /* Byte for this node. NUL for word end */ char_u wn_byte; /* Byte for this node. NUL for word end */
short_u wn_flags; /* when wn_byte is NUL: WF_ flags */ char_u wn_prefixID; /* when "wn_byte" is NUL: supported/required
short wn_region; /* when wn_byte is NUL: region mask; for prefix ID or 0 */
short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */
short wn_region; /* when "wn_byte" is NUL: region mask; for
PREFIXTREE it's the prefcondnr */ PREFIXTREE it's the prefcondnr */
char_u wn_prefixID; /* supported/required prefix ID or 0 */ #ifdef SPELL_PRINTTREE
int wn_nr; /* sequence nr for printing */
#endif
}; };
#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
@ -3180,7 +3196,9 @@ static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
static void free_blocks __ARGS((sblock_T *bl)); static void free_blocks __ARGS((sblock_T *bl));
static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp)); static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region, char_u *pfxlist)); static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region, char_u *pfxlist));
static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp)); static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, spellinfo_T *spin));
static wordnode_T *get_wordnode __ARGS((sblock_T **blp));
static void free_wordnode __ARGS((wordnode_T *n));
static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin)); static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot)); static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
@ -3195,6 +3213,109 @@ static void init_spellfile __ARGS((void));
* Use a negative number with the lower 8 bits zero. */ * Use a negative number with the lower 8 bits zero. */
#define PFX_FLAGS -256 #define PFX_FLAGS -256
static int words_added = 0; /* number of words added to tree */
#ifdef SPELL_PRINTTREE
/*
* For debugging the tree code: print the current tree in a (more or less)
* readable format, so that we can see what happens when adding a word and/or
* compressing the tree.
* Based on code from Olaf Seibert.
*/
#define PRINTLINESIZE 1000
#define PRINTWIDTH 6
#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
static char line1[PRINTLINESIZE];
static char line2[PRINTLINESIZE];
static char line3[PRINTLINESIZE];
static void
spell_clear_flags(wordnode_T *node)
{
wordnode_T *np;
for (np = node; np != NULL; np = np->wn_sibling)
{
np->wn_u1.index = FALSE;
spell_clear_flags(np->wn_child);
}
}
static void
spell_print_node(wordnode_T *node, int depth)
{
if (node->wn_u1.index)
{
/* Done this node before, print the reference. */
PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
PRINTSOME(line2, depth, " ", 0, 0);
PRINTSOME(line3, depth, " ", 0, 0);
msg(line1);
msg(line2);
msg(line3);
}
else
{
node->wn_u1.index = TRUE;
if (node->wn_byte != NUL)
{
if (node->wn_child != NULL)
PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
else
/* Cannot happen? */
PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
}
else
PRINTSOME(line1, depth, " $ ", 0, 0);
PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
if (node->wn_sibling != NULL)
PRINTSOME(line3, depth, " | ", 0, 0);
else
PRINTSOME(line3, depth, " ", 0, 0);
if (node->wn_byte == NUL)
{
msg(line1);
msg(line2);
msg(line3);
}
/* do the children */
if (node->wn_byte != NUL && node->wn_child != NULL)
spell_print_node(node->wn_child, depth + 1);
/* do the siblings */
if (node->wn_sibling != NULL)
{
/* get rid of all parent details except | */
STRCPY(line1, line3);
STRCPY(line2, line3);
spell_print_node(node->wn_sibling, depth);
}
}
}
static void
spell_print_tree(wordnode_T *root)
{
if (root != NULL)
{
/* Clear the "wn_u1.index" fields, used to remember what has been
* done. */
spell_clear_flags(root);
/* Recursively print the tree. */
spell_print_node(root, 0);
}
}
#endif /* SPELL_PRINTTREE */
/* /*
* Read the affix file "fname". * Read the affix file "fname".
* Returns an afffile_T, NULL for complete failure. * Returns an afffile_T, NULL for complete failure.
@ -3611,7 +3732,7 @@ spell_read_aff(fname, spin)
if (upper) if (upper)
n |= WFP_UP; n |= WFP_UP;
tree_add_word(p, spin->si_prefroot, n, tree_add_word(p, spin->si_prefroot, n,
idx, cur_aff->ah_newID, &spin->si_blocks); idx, cur_aff->ah_newID, spin);
} }
} }
} }
@ -4583,7 +4704,7 @@ store_word(word, spin, flags, region, pfxlist)
for (p = pfxlist; res == OK; ++p) for (p = pfxlist; res == OK; ++p)
{ {
res = tree_add_word(foldword, spin->si_foldroot, ct | flags, res = tree_add_word(foldword, spin->si_foldroot, ct | flags,
region, p == NULL ? 0 : *p, &spin->si_blocks); region, p == NULL ? 0 : *p, spin);
if (p == NULL || *p == NUL) if (p == NULL || *p == NUL)
break; break;
} }
@ -4594,7 +4715,7 @@ store_word(word, spin, flags, region, pfxlist)
for (p = pfxlist; res == OK; ++p) for (p = pfxlist; res == OK; ++p)
{ {
res = tree_add_word(word, spin->si_keeproot, flags, res = tree_add_word(word, spin->si_keeproot, flags,
region, p == NULL ? 0 : *p, &spin->si_blocks); region, p == NULL ? 0 : *p, spin);
if (p == NULL || *p == NUL) if (p == NULL || *p == NUL)
break; break;
} }
@ -4610,27 +4731,63 @@ store_word(word, spin, flags, region, pfxlist)
* Returns FAIL when out of memory. * Returns FAIL when out of memory.
*/ */
static int static int
tree_add_word(word, root, flags, region, prefixID, blp) tree_add_word(word, root, flags, region, prefixID, spin)
char_u *word; char_u *word;
wordnode_T *root; wordnode_T *root;
int flags; int flags;
int region; int region;
int prefixID; int prefixID;
sblock_T **blp; spellinfo_T *spin;
{ {
wordnode_T *node = root; wordnode_T *node = root;
wordnode_T *np; wordnode_T *np;
wordnode_T *copyp, **copyprev;
wordnode_T **prev = NULL; wordnode_T **prev = NULL;
int i; int i;
sblock_T **blp = &spin->si_blocks;
/* Add each byte of the word to the tree, including the NUL at the end. */ /* Add each byte of the word to the tree, including the NUL at the end. */
for (i = 0; ; ++i) for (i = 0; ; ++i)
{ {
/* When there is more than one reference to this node we need to make
* a copy, so that we can modify it. Copy the whole list of siblings
* (we don't optimize for a partly shared list of siblings). */
if (node != NULL && node->wn_refs > 1)
{
--node->wn_refs;
copyprev = prev;
for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
{
/* Allocate a new node and copy the info. */
np = get_wordnode(blp);
if (np == NULL)
return FAIL;
np->wn_child = copyp->wn_child;
if (np->wn_child != NULL)
++np->wn_child->wn_refs; /* child gets extra ref */
np->wn_byte = copyp->wn_byte;
if (np->wn_byte == NUL)
{
np->wn_flags = copyp->wn_flags;
np->wn_region = copyp->wn_region;
np->wn_prefixID = copyp->wn_prefixID;
}
/* Link the new node in the list, there will be one ref. */
np->wn_refs = 1;
*copyprev = np;
copyprev = &np->wn_sibling;
/* Let "node" point to the head of the copied list. */
if (copyp == node)
node = np;
}
}
/* Look for the sibling that has the same character. They are sorted /* Look for the sibling that has the same character. They are sorted
* on byte value, thus stop searching when a sibling is found with a * on byte value, thus stop searching when a sibling is found with a
* higher byte value. For zero bytes (end of word) the sorting is * higher byte value. For zero bytes (end of word) the sorting is
* done on flags and then on prefixID * done on flags and then on prefixID. */
*/
while (node != NULL while (node != NULL
&& (node->wn_byte < word[i] && (node->wn_byte < word[i]
|| (node->wn_byte == NUL || (node->wn_byte == NUL
@ -4651,10 +4808,22 @@ tree_add_word(word, root, flags, region, prefixID, blp)
|| node->wn_prefixID != prefixID))) || node->wn_prefixID != prefixID)))
{ {
/* Allocate a new node. */ /* Allocate a new node. */
np = (wordnode_T *)getroom(blp, sizeof(wordnode_T), TRUE); np = get_wordnode(blp);
if (np == NULL) if (np == NULL)
return FAIL; return FAIL;
np->wn_byte = word[i]; np->wn_byte = word[i];
/* If "node" is NULL this is a new child or the end of the sibling
* list: ref count is one. Otherwise use ref count of sibling and
* make ref count of sibling one (matters when inserting in front
* of the list of siblings). */
if (node == NULL)
np->wn_refs = 1;
else
{
np->wn_refs = node->wn_refs;
node->wn_refs = 1;
}
*prev = np; *prev = np;
np->wn_sibling = node; np->wn_sibling = node;
node = np; node = np;
@ -4670,10 +4839,73 @@ tree_add_word(word, root, flags, region, prefixID, blp)
prev = &node->wn_child; prev = &node->wn_child;
node = *prev; node = *prev;
} }
#ifdef SPELL_PRINTTREE
smsg("Added \"%s\"", word);
spell_print_tree(root->wn_sibling);
#endif
/*
* Every so many words compress the tree, so that we don't use too much
* memory.
*/
if (++words_added >= SPELL_COMPRESS_CNT)
{
words_added = 0;
msg_start();
msg_puts((char_u *)_(msg_compressing));
msg_clr_eos();
msg_didout = FALSE;
msg_col = 0;
out_flush();
wordtree_compress(root->wn_sibling, spin);
}
return OK; return OK;
} }
/* We keep a list of nodes that have been freed during compression. They are
* re-used when adding a new node. The "wn_child" fields links them. */
static wordnode_T *first_free_node = NULL;
#ifdef SPELL_PRINTTREE
static int wordnode_nr = 0;
#endif
/*
* Get a wordnode_T, either from the list of previously freed nodes or
* allocate a new one.
*/
static wordnode_T *
get_wordnode(blp)
sblock_T **blp;
{
wordnode_T *n;
if (first_free_node == NULL)
n = (wordnode_T *)getroom(blp, sizeof(wordnode_T), TRUE);
else
{
n = first_free_node;
first_free_node = n->wn_child;
vim_memset(n, 0, sizeof(wordnode_T));
}
#ifdef SPELL_PRINTTREE
n->wn_nr = ++wordnode_nr;
#endif
return n;
}
/*
* Free a wordnode_T for re-use later.
*/
static void
free_wordnode(n)
wordnode_T *n;
{
n->wn_child = first_free_node;
first_free_node = n;
}
/* /*
* Compress a tree: find tails that are identical and can be shared. * Compress a tree: find tails that are identical and can be shared.
*/ */
@ -4685,20 +4917,31 @@ wordtree_compress(root, spin)
hashtab_T ht; hashtab_T ht;
int n; int n;
int tot = 0; int tot = 0;
int perc;
if (root != NULL) if (root != NULL)
{ {
hash_init(&ht); hash_init(&ht);
n = node_compress(root, &ht, &tot); n = node_compress(root, &ht, &tot);
#ifndef SPELL_PRINTTREE
if (spin->si_verbose || p_verbose > 2) if (spin->si_verbose || p_verbose > 2)
#endif
{ {
if (!spin->si_verbose) if (!spin->si_verbose)
verbose_enter(); verbose_enter();
if (tot > 1000000)
perc = (tot - n) / (tot / 100);
else
perc = (tot - n) * 100 / tot;
smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"), smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
n, tot, (tot - n) * 100 / tot); n, tot, perc);
if (p_verbose > 2) if (p_verbose > 2)
verbose_leave(); verbose_leave();
} }
#ifdef SPELL_PRINTTREE
spell_print_tree(root);
#endif
hash_clear(&ht); hash_clear(&ht);
} }
} }
@ -4749,8 +4992,17 @@ node_compress(node, ht, tot)
if (node_equal(child, tp)) if (node_equal(child, tp))
{ {
/* Found one! Now use that child in place of the /* Found one! Now use that child in place of the
* current one. This means the current child is * current one. This means the current child and all
* dropped from the tree. */ * its siblings is unlinked from the tree. */
++tp->wn_refs;
--child->wn_refs;
if (child->wn_refs == 0)
for (; child != NULL; child = child->wn_sibling)
{
if (child->wn_child != NULL)
--child->wn_child->wn_refs;
free_wordnode(child);
}
np->wn_child = tp; np->wn_child = tp;
++compressed; ++compressed;
break; break;
@ -5023,11 +5275,11 @@ write_vim_spell(fname, spin)
for (round = 1; round <= 3; ++round) for (round = 1; round <= 3; ++round)
{ {
if (round == 1) if (round == 1)
tree = spin->si_foldroot; tree = spin->si_foldroot->wn_sibling;
else if (round == 2) else if (round == 2)
tree = spin->si_keeproot; tree = spin->si_keeproot->wn_sibling;
else else
tree = spin->si_prefroot; tree = spin->si_prefroot->wn_sibling;
/* Clear the index and wnode fields in the tree. */ /* Clear the index and wnode fields in the tree. */
clear_node(tree); clear_node(tree);
@ -5271,6 +5523,11 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
ga_init2(&spin.si_map, (int)sizeof(char_u), 100); ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
first_free_node = NULL;
#ifdef SPELL_PRINTTREE
wordnode_nr = 0;
#endif
words_added = 0;
/* default: fnames[0] is output file, following are input files */ /* default: fnames[0] is output file, following are input files */
innames = &fnames[1]; innames = &fnames[1];
@ -5366,7 +5623,7 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
|| spin.si_keeproot == NULL || spin.si_keeproot == NULL
|| spin.si_prefroot == NULL) || spin.si_prefroot == NULL)
{ {
error = TRUE; free_blocks(spin.si_blocks);
return; return;
} }
@ -5421,13 +5678,6 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
if (!error) if (!error)
{ {
/*
* Remove the dummy NUL from the start of the tree root.
*/
spin.si_foldroot = spin.si_foldroot->wn_sibling;
spin.si_keeproot = spin.si_keeproot->wn_sibling;
spin.si_prefroot = spin.si_prefroot->wn_sibling;
/* /*
* Combine tails in the tree. * Combine tails in the tree.
*/ */
@ -5435,14 +5685,14 @@ mkspell(fcount, fnames, ascii, overwrite, added_word)
{ {
if (added_word) if (added_word)
verbose_enter(); verbose_enter();
MSG(_("Compressing word tree...")); MSG(_(msg_compressing));
out_flush(); out_flush();
if (added_word) if (added_word)
verbose_leave(); verbose_leave();
} }
wordtree_compress(spin.si_foldroot, &spin); wordtree_compress(spin.si_foldroot->wn_sibling, &spin);
wordtree_compress(spin.si_keeproot, &spin); wordtree_compress(spin.si_keeproot->wn_sibling, &spin);
wordtree_compress(spin.si_prefroot, &spin); wordtree_compress(spin.si_prefroot->wn_sibling, &spin);
} }
if (!error) if (!error)