From b683b1f3ec7e5e624fd667498d24deaa6beeb120 Mon Sep 17 00:00:00 2001 From: Neil Date: Thu, 2 Feb 2023 21:31:24 -0800 Subject: [PATCH] Too complex substring. --- Makefile | 1 + src/helper.h | 13 ++- src/lorem.c | 35 ++++++ src/lorem.h | 4 + src/not-used.c | 242 ----------------------------------------- src/source.re.c | 18 ++- test/test_substrings.c | 20 ++++ 7 files changed, 87 insertions(+), 246 deletions(-) create mode 100644 src/lorem.c create mode 100644 src/lorem.h create mode 100644 test/test_substrings.c diff --git a/Makefile b/Makefile index be80ea6..14dfb01 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ default: $(projects) # success bin/test-text: build/text.o build/test_text.o +bin/test-strings: build/substrings.o build/test_substrings.o bin/test-journal: build/text.o build/journal.o build/test_journal.o bin/test-source: build/text.o build/journal.o build/source.o build/test_source.o bin/test-kjv: build/text.o build/kjv.o build/test_kjv.o diff --git a/src/helper.h b/src/helper.h index f4b9efc..937ab42 100644 --- a/src/helper.h +++ b/src/helper.h @@ -1,8 +1,9 @@ +#include #include #include /** `printf`-compatible substring. */ -struct substring { const char *sub; int size; }; +struct substring { const char *sub; size_t size; }; /** Parse unsigned; [`s`,`e`) => `n`. */ static int helper_natural(const char *s, const char *const e, uint32_t *const n) @@ -18,7 +19,15 @@ static int helper_natural(const char *s, const char *const e, uint32_t *const n) return 1; } +/** djb2 */ +static uint32_t djb2(const char *s) { + const unsigned char *str = (const unsigned char *)s; + uint32_t hash = 5381, c; + while(c = *str++) hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + return hash; +} + static void unused_helper_coda(void); static void unused_helper(void) - { helper_natural(0, 0, 0); unused_helper_coda(); } + { helper_natural(0, 0, 0); djb2(0); unused_helper_coda(); } static void unused_helper_coda(void) { unused_helper(); } diff --git a/src/lorem.c b/src/lorem.c new file mode 100644 index 0000000..8439d55 --- /dev/null +++ b/src/lorem.c @@ -0,0 +1,35 @@ +/** @license 2023 Neil Edelman, distributed under the terms of the + [MIT License](https://opensource.org/licenses/MIT). + + Temporary strings duplicated from substrings. + + @std C89 */ + +#include "lorem.h" +#include +#include + +/*#define ARRAY_NAME block +#define ARRAY_TYPE char * +#include "array.h"*/ + +#define POOL_NAME char +#define POOL_TYPE char +#include "pool.h" + +static struct char_pool pool; + +const char *lorem_dup(const struct substring sub) { + char *string; + if(!sub.sub) { errno = EDOM; return 0; } + if(sub.size == ~(size_t)0) { errno = ERANGE; return 0; } + if(!char_pool_buffer(&pool, sub.size + 1)) return 0; + assert(pool.capacity0 - pool.slots.data[0].size >= sub.size + 1); + string = pool.slots.data[0].slab + pool.slots.data[0].size; + memcpy(string, sub.sub, sub.size); + string[sub.size] = '\0'; + pool.slots.data[0].size += sub.size + 1; + return string; +} + +void lorem_(void) { char_pool_(&pool); } diff --git a/src/lorem.h b/src/lorem.h new file mode 100644 index 0000000..6704ab1 --- /dev/null +++ b/src/lorem.h @@ -0,0 +1,4 @@ +#include "helper.h" + +const char *lorem_dup(const struct substring sub); +void lorem_(void); diff --git a/src/not-used.c b/src/not-used.c index 1913d9c..a907cf7 100644 --- a/src/not-used.c +++ b/src/not-used.c @@ -60,64 +60,6 @@ set datafile separator comma plot $Data u 2:($3) smooth cumulative with steps lw 2 lc "red" ti "cumulative count" ### end of code */ -static int bible_graph(/*const*/ struct page_tree *const journal) { - enum { CHILL, BOOK, CHAPTER, WORD, NEXT } state = CHILL; - struct page_tree_entry entry = { 0, 0 }; - struct lex *lex = 0; - size_t count = 0; - for(struct page_tree_iterator p_it = page_tree_begin(journal); - (entry = page_tree_next(&p_it)).key; ) { - struct page *const page = entry.value; - for(struct lex_array_iterator l_it = lex_array_iterator(&page->meaning); - (lex = lex_array_next(&l_it)); ) { - switch(lex->symbol) { - case KJV_BOOK: - if(state != CHILL && state != WORD) goto catch; - if(state == WORD) printf("\n"); - fprintf(stderr, "%d-%.2d-%.2d: \"%.*s\", ", - entry.key->year, entry.key->month, entry.key->day, - (int)(lex->s1 - lex->s0), lex->s0); - state = BOOK; - break; - case KJV_CHAPTER_VERSE: - if(state != BOOK) goto catch; - printf("\"%.*s\", \"", (int)(lex->s1 - lex->s0), lex->s0); - state = CHAPTER; - break; - case KJV_NEXT: - if(state != WORD) goto catch; - printf("\"\n"); - break; - case KJV_TEXT: - if(state != WORD && state != CHAPTER && state != NEXT) - goto catch; - printf("%s%.*s<%d>", state == WORD ? "*" : "", - (int)(lex->s1 - lex->s0 < 0 ? 10 : lex->s1 - lex->s0), - lex->s0, (int)(lex->s1 - lex->s0)); - count++; - state = WORD; - break; - default: - if(state != CHILL && state != WORD) goto catch; - if(state == WORD) printf("\"\n"), state = CHILL; - break; - } - } - if(state != CHILL && state != WORD) goto catch; - if(state == WORD) printf("\n"), state = CHILL; - } - printf("Count: %lu.\n", (unsigned long)count); - return 1; -catch: - fprintf(stderr, "Bible error.\n"); - if(entry.key) { - fprintf(stderr, "On date: %d-%.2d-%.2d.\n", - entry.key->year, entry.key->month, entry.key->day); - if(lex) fprintf(stderr, "At line %lu.\n", (unsigned long)lex->line); - } - errno = EILSEQ; - return 0; -} #define C_BLACK "\033[0;30m" #define C_RED "\033[0;31m" @@ -139,190 +81,6 @@ catch: #define C_RESET "\033[0m" -int main(int argc, char **argv) { - int success = EXIT_SUCCESS; - char *intent = 0; - - /* For reading in files, overwritten. */ - DIR *dir = 0; - struct dirent *de; - struct int_array years = int_array(), months = int_array(), - days = int_array(); - int *y, *y_end; - - struct page_tree journal = page_tree(); - - errno = 0; - if(argc != 2) { intent = "needs journal location, which should" - " contain //.txt"; goto catch; } - - /* Get the years list as directories matching a year. */ - if(chdir(argv[1]) == -1 || !(dir = opendir("."))) goto catch; - while((de = readdir(dir))) { - struct stat st; - int year, *p; - if(!lex_looks_like_year(de->d_name, &year)) continue; - if(stat(de->d_name, &st)) goto catch; - if(!S_ISDIR(st.st_mode)) continue; - if(!(p = int_array_new(&years))) goto catch; - *p = year; - } - closedir(dir), dir = 0; - /* Sort the years for sensible ordering of parsing. */ - qsort(years.data, years.size, sizeof *years.data, &void_int_cmp); - fprintf(stderr, "Years in <<%s>>: %s.\n", - argv[1], int_array_to_string(&years)); - - /* Go though each year. */ - for(y = years.data, y_end = y + years.size; y < y_end; y++) { - char fn[64]; - int *m, *m_end; - sprintf(fn, "%d", *y); - - /* Get the months as directories. */ - if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch; - while((de = readdir(dir))) { - struct stat st; - int month, *p; - if(!(month = lex_looks_like_month(de->d_name))) continue; - if(stat(de->d_name, &st)) goto catch; - if(!S_ISDIR(st.st_mode)) continue; - if(!(p = int_array_new(&months))) goto catch; - *p = month; - } - closedir(dir), dir = 0; - qsort(months.data, months.size, sizeof *months.data, &void_int_cmp); - fprintf(stderr, "Months in <<%s>>: %s.)\n", - fn, int_array_to_string(&months)); - - /* Go though each month. */ - for(m = months.data, m_end = m + months.size; m < m_end; m++) { - int *d, *d_end; - sprintf(fn, "%.2d", *m); - - /* Get the days as files. */ - if(chdir(fn) == -1 || !(dir = opendir("."))) goto catch; - while((de = readdir(dir))) { - struct stat st; - int day, *p; - /* fixme: Have yyyy-mm-dd to figure out how many days. */ - if(!(day = lex_looks_like_day(de->d_name))) continue; - if(stat(de->d_name, &st)) goto catch; - if(S_ISDIR(st.st_mode)) continue; - if(!(p = int_array_new(&days))) goto catch; - *p = day; - } - closedir(dir), dir = 0; - qsort(days.data, days.size, sizeof *days.data, &void_int_cmp); - fprintf(stderr, "Days in <<%s>>: %s.\n", - fn, int_array_to_string(&days)); - - for(d = days.data, d_end = d + days.size; d < d_end; d++) { - struct lex *lex = 0; - struct page *page = 0; - union date32 d32; - if(!(d32 = date_to_32(*y, *m, *d)).year) { errno = EILSEQ; - intent = "date parse error"; goto syntax; } - sprintf(fn, "%.2d.txt", *d); - if(page_tree_bulk_add(&journal, d32, &page) != TREE_UNIQUE) { - if(!errno) intent = "not unique", errno = EDOM; - goto syntax; - } - page->entry = char_array(); - page->meaning = lex_array(); - if(!append_file(&page->entry, fn)) goto syntax; - int first = 1; - for(lex_reset(page->entry.data); ; ) { - if(!(lex = lex_array_new(&page->meaning))) goto syntax; - if(!lex_next(lex)) { - if(lex->symbol != END) { errno = EILSEQ; goto syntax; } - break; /* Terminated successfully. */ - } - switch(lex->symbol) { - case TEXT: printf("%s%.*s", - first ? "" : " ", (int)(lex->s1 - lex->s0), lex->s0); - first = 0; break; - case PARAGRAPH: printf("\n" C_RESET); break; - case KJV_BOOK: printf(C_YELLOW "%.*s", - (int)(lex->s1 - lex->s0), lex->s0); break; - case KJV_CHAPTER_VERSE: printf(" ch. %.*s", - (int)(lex->s1 - lex->s0), lex->s0); break; - case KJV_TEXT: printf("%.*s", - (int)(lex->s1 - lex->s0), lex->s0); break; - case KJV_NEXT: printf("(next)\n"); break; - default: - fprintf(stderr, "%lu: %s", - (unsigned long)lex->line, lex_symbols[lex->symbol]); - if(lex->s0 && lex->s1) { - if(lex->s0 + INT_MAX < lex->s1) - intent = "line too long", errno = EILSEQ; - else - fprintf(stderr, " <<%.*s>>", - (int)(lex->s1 - lex->s0), lex->s0); - } - fprintf(stderr, ".\n"); - break; - } - } - continue; -syntax: - fprintf(stderr, "On date: %d-%.2d-%.2d.\n", *y, *m, *d); - if(!page) goto catch; - if(!lex) { fprintf(stderr, "While parsing <<<\n%s>>>.\n", - page->entry.data); goto catch; } - for(struct lex_array_iterator it - = lex_array_iterator(&page->meaning); - lex = lex_array_next(&it); ) { - fprintf(stderr, "%lu: %s", - (unsigned long)lex->line, lex_symbols[lex->symbol]); - if(lex->s0 && lex->s1) { - if(lex->s0 + INT_MAX < lex->s1) - intent = "line too long", errno = EILSEQ; - else - fprintf(stderr, " <<%.*s>>", - (int)(lex->s1 - lex->s0), lex->s0); - } - fprintf(stderr, ".\n"); - } - goto catch; - } - - int_array_clear(&days); - if(chdir("..") == -1) goto catch; - } - - int_array_clear(&months); - if(chdir("..") == -1) goto catch; - /* fixme: Expand, contact is the next thing that it doesn't get. */ - if(*y == 1993/*1996*/) break; - } - page_tree_bulk_finish(&journal); - int_array_(&years), int_array_(&months), int_array_(&days); - fprintf(stderr, "Journal has entries: %s\n", page_tree_to_string(&journal)); - - /* Do something interesting? */ - if(!bible_graph(&journal)) goto catch; - - goto finally; -catch: - success = EXIT_FAILURE; - perror("interpret"); - if(intent) fprintf(stderr, "Further explanation: %s.\n", intent); -finally: - if(dir && closedir(dir)) success = EXIT_FAILURE, perror("dir"); - int_array_(&years), int_array_(&months), int_array_(&days); - struct page_tree_entry entry; - for(struct page_tree_iterator it = page_tree_begin(&journal); - (entry = page_tree_next(&it)).key; ) { - struct page *const page = entry.value; - char z[12]; - date32_to_string(*entry.key, &z); - lex_array_(&page->meaning); - char_array_(&page->entry); - } - return success; -} - /* Popular KJV 738137 words. */ struct scan scan(const char *const buffer) { diff --git a/src/source.re.c b/src/source.re.c index 45e3332..2b11c7e 100644 --- a/src/source.re.c +++ b/src/source.re.c @@ -5,11 +5,25 @@ #include "../src/source.h" /* base */ #include "../src/journal.h" #include +#include #include #include - - +/* This is a lookup table for source strings ("2000glider") to substring the + first description ("Glider pilot log book"). */ +static int lookup_is_equal(const char *const x, const char *const y) + { return !strcmp(x, y); } +static uint32_t lookup_hash(const char *const x) { return djb2(x); } +static void lookup_to_string(const char *x, const struct substring desc, + char (*const a)[12]) { (void)desc; sprintf(*a, "%.11s", x); } +static struct substring lookup_default = { 0, 0 }; +#define TABLE_NAME lookup +#define TABLE_KEY char * +#define TABLE_UINT uint32_t +#define TABLE_VALUE struct substring +#define TABLE_DEFAULT lookup_default +#define TABLE_TO_STRING +#include "../src/table.h" static void source_to_string(const union line64 line, const struct substring *u, diff --git a/test/test_substrings.c b/test/test_substrings.c new file mode 100644 index 0000000..01fc59d --- /dev/null +++ b/test/test_substrings.c @@ -0,0 +1,20 @@ +#include "../src/lorem.h" +#include +#include + +int main(void) { + int success = EXIT_SUCCESS; + const char lorem[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean tincidunt leo neque. Integer vel bibendum lectus, a vulputate dolor. Vivamus vestibulum quam ut euismod aliquet. Vivamus vel pulvinar felis, eu dictum lorem. Integer scelerisque lobortis orci nec tincidunt. Mauris vulputate ipsum non tempus tincidunt. Pellentesque nec iaculis dolor. Curabitur bibendum pretium dui euismod tincidunt. In cursus, libero et porta placerat, ante ante accumsan lacus, nec sollicitudin ex elit nec lectus. Sed nisi sem, rhoncus sed nulla et, faucibus feugiat eros."; + struct substring s; + const char *a; + s.sub = lorem + 6, s.size = 5; + if(!(a = lorem_dup(s))) goto catch; + printf("lorem: <%s>\n", a); + goto finally; +catch: + success = EXIT_FAILURE; + perror("text"); +finally: + lorem_(); + return success; +}