/* ~~~~~~~~~~~~~~~~ llm - The little LISP machinery ~~~~~~~~~~~~~ * * Copyright (c) 2019 Christian Barthel * All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the * following conditions * are met: * 1. Redistributions of source code must retain the above * copyright * notice, this list of conditions and the * following disclaimer. * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT * SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Goal: evaluate simple LISP forms in an * read-eval-print stylized loop. supports: * * Data types: * # Functions * t,nil boolean (nand ) -> boolean * "str" Strings (streq ) -> boolean * abc Symboles (symeq ) -> boolean * 1234 Number/Integer (inv ) -> -num * (add ) -> num * (lt ) -> boolean * (num2bool ) -> boolean * * Special Forms: * (def ..) ()) * (if ) * (quote a) * * * Compile: flex tokenize.l && cc lex.yy.c -lfl -o llm * Or: make; ./llm */ %{ #define _POSIX_C_SOURCE 200809L #define _XOPEN_SOURCE 600 #include #include #include #include struct token; struct token { int type; union { char *str; int num; } v; struct token *next; }; struct ast; struct ast { int type; union { struct token *token; struct ast *list; } v; struct ast *next; }; struct env; struct env { char *name; struct ast *a; struct env *parent; struct env *next; }; enum asttype { AST_TOK = 400, AST_LIST = 401, AST_QUOTE = 402 }; enum yytokentype { NUM = 258, LPAR = 259, RPAR = 260, SYM = 261, STR = 262, EOL = 263, BOOL = 264 }; struct token* make_token(enum yytokentype, int, char*); struct ast* eval(struct ast *a, struct env *e, struct env **w); int yylval; char *yystr; %} %% "(" { return LPAR; } ")" { return RPAR; } t { yylval = 1; return BOOL;} nil { yylval = 0; return BOOL; } [0-9]+ { yylval = atoi(yytext); return NUM; } ;.* { /* comment */ } [ \t\n] { /* ignore white space */ } \"[^\"]*\" { return STR; } [a-zA-Z][_a-zA-Z0-9]* { return SYM; } . { err(1, "invalid symbol: %s\n", yytext); } %% void reassure(int promised, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (!promised) { vfprintf(stderr, fmt, ap); fputc('\n', stderr); exit(1); } va_end(ap); } void debug_token(struct token *x, int indent) { for (int i = 0; i < indent; i++) putc(' ', stderr); if (x == NULL) { fprintf(stderr, "NULL TOKEN\n"); return; } fprintf(stderr, "%d: (%p)", x->type, x); if (x->type == NUM) fprintf(stderr, "%d @ %p", x->v.num, x); if (x->type == STR || x->type == SYM) fprintf(stderr, "%s", x->v.str); putc('\n', stderr); } void debug_ast(struct ast *a, int indent) { while (a) { for (int i = 0; i < indent; i++) putc(' ', stderr); fprintf(stderr, "AST: %s,a=%p,list=%p,token=%p,next=%p", (a->type == AST_TOK) ? "AST_TOK" : "AST_LIST", a, a->v.list, a->v.token, a->next); putc('\n', stderr); if (a->type == AST_LIST) { fprintf(stderr, "enter %p\n", a->v.list); fprintf(stderr, "==\n"); debug_ast(a->v.list, indent+4); fprintf(stderr, "==\n"); } else if(a->type == AST_TOK) debug_token(a->v.token, indent); a = a->next; } } struct token* make_token(enum yytokentype type, int num, char* str) { struct token *t = (struct token*) calloc(1, sizeof(struct token)); if (t == NULL) err(1, "malloc failed"); t->type = type; if (type == NUM || type == BOOL) t->v.num = num; else if (type == SYM || type == STR) t->v.str = strdup(str); return t; } struct ast* make_ast(enum asttype type, struct token *t) { struct ast *a = (struct ast*) calloc(1, sizeof(struct ast)); if (a == NULL) err(1, "malloc failed"); a->type = type; if (type == AST_TOK) a->v.token = t; /* else if (type == AST_LIST) */ /* a->v.list = n; */ return a; } struct env* make_env(char *name, struct ast *a) { struct env *e = (struct env*) calloc(1, sizeof(struct env)); if (e == NULL) err(1, "malloc failed"); e->name = name; e->a = a; return e; } int token_is_num(struct token *t) {return t->type == NUM;} int token_is_sym(struct token *t) {return t->type == SYM;} int token_is_str(struct token *t) {return t->type == STR;} int token_is_bool(struct token *t) {return t->type == BOOL;} int token_is_internal(struct token *t) { if (strcmp(t->v.str, "def") == 0) return 1; else if (strcmp(t->v.str, "lm") == 0) return 1; else if (strcmp(t->v.str, "if") == 0) return 1; else if (strcmp(t->v.str, "inv") == 0) return 1; else if (strcmp(t->v.str, "add") == 0) return 1; else if (strcmp(t->v.str, "lt") == 0) return 1; else if (strcmp(t->v.str, "nand") == 0) return 1; else if (strcmp(t->v.str, "q") == 0) return 1; else if (strcmp(t->v.str, "num2bool") == 0) return 1; else return 0; } /* upon parsing a list of tokens like: * "(" -> "def" -> "(" -> "b" -> ")" -> "c" -> ")" * care must be taken when entering a new sub-list (b), * because the processing in the upper level must continue * where the sub-list ended, i.e. "c" must be the next * token on this level. I've solved this with a global * variable `next` and reset the current symbol after * creating a sublist. */ struct token *next; struct ast* parse(struct token *t) { if (t == NULL) err(1, "Unexpected token: "); if (t->type == LPAR) { struct ast *head = make_ast(AST_LIST, NULL); struct ast *a, *p; t = t->next; while (t->type != RPAR) { reassure(t != NULL, "%s: unexpected NULL", __func__); a = parse(t); if (a == NULL) err(1, "syntax error"); if (head->v.list == NULL) p = head->v.list = a; else p->next = a; p = a; t = next; /* skip all tokens that are processed */ reassure(t != NULL, "%s: unexpected NULL(2)", __func__); } reassure(t->type == RPAR, "%s: RPAR expected but given: %d", __func__, t->type); t = t->next; next = t; return head; } else if (t->type == NUM) { next = t->next; return make_ast(AST_TOK, t); } else if (t->type == BOOL) { next = t->next; return make_ast(AST_TOK, t); } else if (t->type == SYM) { next = t->next; return make_ast(AST_TOK, t); } else if (t->type == STR) { next = t->next; return make_ast(AST_TOK, t); } return NULL; } void append(struct env *e, struct env *a) { if (e->next == NULL) { e->next = a; return; } else append(e->next, a); } struct ast * lookup(struct env *e, char *name) { if (e == NULL) return NULL; for (struct env *t = e; t != NULL; t = t->next) { /* fprintf(stderr, "env: %s\n", t->name); */ if (strcmp(t->name, name) == 0) return t->a; } //fprintf(stderr, "====\n"); return lookup(e->parent, name); } /* (def a ) */ struct ast * eval_def(struct ast *a, struct env *e) { a = a->next; /* skip `def` */ //debug_ast(a, 0); reassure(a->v.token->type == SYM, /* must be a symbol */ "%s: expected SYM, given: %d", __func__, a->v.token->type); reassure(a != NULL && a->next != NULL, /* need sth to bind..*/ "%s: unexpected NULL", __func__); reassure((a->next->type == AST_TOK && /* check type */ a->next->v.token->type== STR) || (a->next->type == AST_TOK && a->next->v.token->type == BOOL) || (a->next->type == AST_TOK && a->next->v.token->type == NUM) || (a->next->type == AST_LIST) || (a->next->type == AST_TOK && a->next->v.token->type== SYM), "%s: expected STR,BOOL,NUM,SYM as assigned expression", __func__); char *new = a->v.token->v.str; for (struct env *x = e; x; x = x->next) reassure(strcmp(x->name, new) != 0, "%s: %s already assigned in env=%p", __func__, new, e); append(e, make_env(new, eval(a->next, e, NULL))); reassure(a->next->next == NULL, "%s: ended with unexpected token", __func__); return NULL; } /* (if ) */ struct ast * eval_if(struct ast *a, struct env *e) { //debug_ast(a, 0); a = a->next; reassure(a != NULL && a->next != NULL, "%s: unexpected NULL", __func__); struct ast *condition = eval(a, e, NULL); reassure(condition != NULL && condition->type == AST_TOK && condition->v.token->type == BOOL, "%s: expected boolean token", __func__); reassure(a->next != NULL && a->next->next != NULL, "%s: unexpected expressions", __func__); if (condition->v.token->v.num == 1) return eval(a->next, e, NULL); else return eval(a->next->next, e, NULL); } /* (nand ) -> BOOL */ struct ast * eval_nand(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next != NULL && a->next->next->next == NULL, "%s: unexpected NULL", __func__); struct ast *op1 = eval(a->next, e, NULL); struct ast *op2 = eval(a->next->next, e, NULL); reassure(op1 != NULL && op1->type == AST_TOK && op1->v.token->type == BOOL && op2 != NULL && op2->type == AST_TOK && op2->v.token->type == BOOL, "%s: expected operands", __func__); return make_ast(AST_TOK, make_token(BOOL, !( op1->v.token->v.num & op2->v.token->v.num) , NULL)); } /* (add ) -> NUM */ struct ast * eval_add(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next != NULL && a->next->next->next == NULL, "%s: expected 2 operands", __func__); struct ast *op1 = eval(a->next, e, NULL); struct ast *op2 = eval(a->next->next, e, NULL); reassure(op1 != NULL && op1->type == AST_TOK && op1->v.token->type == NUM && op2 != NULL && op2->type == AST_TOK && op2->v.token->type == NUM, "%s: expected operands", __func__); return make_ast(AST_TOK, make_token(NUM, op1->v.token->v.num + op2->v.token->v.num, NULL)); } /* (lt ) -> BOOL*/ struct ast * eval_lt(struct ast *a, struct env *e) { reassure (a != NULL && a->next != NULL && a->next->next != NULL && a->next->next->next == NULL, "%s: expected 2 operands", __func__); struct ast *op1 = eval(a->next, e, NULL); struct ast *op2 = eval(a->next->next, e, NULL); reassure(op1 != NULL && op1->type == AST_TOK && op1->v.token->type == NUM && op2 != NULL && op2->type == AST_TOK && op2->v.token->type == NUM, "%s: expected operands", __func__); return make_ast(AST_TOK, make_token(BOOL, op1->v.token->v.num < op2->v.token->v.num, NULL)); } /* (inv ) -> NUM */ struct ast * eval_inv(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next == NULL, "%s: expected one argument", __func__); struct ast *op = eval(a->next, e, NULL); reassure(op != NULL && op->type == AST_TOK && op->v.token->type == NUM && op->next == NULL, "%s: expected argument", __func__); return make_ast(AST_TOK, make_token(NUM, 0 - op->v.token->v.num, NULL)); } /* (num2bool ) -> bool */ struct ast * eval_num2bool(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next == NULL, "%s: expected one operand", __func__); struct ast *op = eval(a->next, e, NULL); reassure(op != NULL && op->type == AST_TOK && op->v.token->type == NUM && op->next == NULL, "%s: expected operand", __func__); return make_ast(AST_TOK, make_token(BOOL, op->v.token->v.num != 0, NULL)); } struct ast * eval_internal_sym(struct ast *x, struct env *e) { struct ast *a = x->v.list; if (strcmp(a->v.token->v.str, "def") == 0) return eval_def(a, e); else if (strcmp(a->v.token->v.str, "lm") == 0) { //new->v.token = a->v.token; return x; struct ast *l = make_ast(AST_LIST, NULL); struct ast *head = make_ast(AST_TOK, NULL); l->v.list = head; l->next = a->next; fprintf(stderr, "%p, %p\n", l->next, a->next); l->next->next = a->next->next; //debug_ast(l, 0); head->v.token = a->v.token; //fprintf(stderr, "returning AST:\n"); //fprintf(stderr, "------------------------\n"); //debug_ast(l, 0); //fprintf(stderr, "------------------------\n"); return l; } else if (strcmp(a->v.token->v.str, "if") == 0) { return eval_if(a, e); } else if (strcmp(a->v.token->v.str, "inv") == 0) { return eval_inv(a, e); } else if (strcmp(a->v.token->v.str, "add") == 0) { return eval_add(a, e); } else if (strcmp(a->v.token->v.str, "lt") == 0) { return eval_lt(a, e); } else if (strcmp(a->v.token->v.str, "nand") == 0) { return eval_nand(a, e); } else if (strcmp(a->v.token->v.str, "q") == 0) { //return make_ast(AST_QUOTE, a->next); return a->next; } else if (strcmp(a->v.token->v.str, "num2bool") == 0) { return eval_num2bool(a, e); } return NULL; } struct ast * eval_defined_sym(struct ast *a, struct env *e) { struct ast *var = lookup(e, a->v.token->v.str); reassure(var != NULL, "%s: symbol %s not found", __func__, a->v.token->v.str); return var; } struct env *lastenv; struct env * bind_args(struct token *args, struct ast *fnargs_values, struct env *e) { reassure(args && args->type == LPAR, "%s: first argument: %d %s", __func__, args->type, args->v.str); args = args->next; struct ast *a = fnargs_values; struct token *t = args; struct env *newenv = (struct env*) calloc(1, sizeof(struct env)); newenv->name = strdup("__fn"); newenv->parent = e; lastenv = newenv; while (t != NULL && t->type != RPAR) { reassure(t != NULL && t->type == SYM && t->v.str != NULL, "%s: wrong args param found: %s", __func__, t->v.str); reassure(a != NULL && (a->type == AST_TOK || a->type == AST_LIST), "%s: arg=%s given, but no value present", __func__, t->v.str); struct ast *new = eval(a, e, NULL); append(newenv, make_env(t->v.str, new)); t = t->next; a = a->next; } reassure (t->type == RPAR, "%s: args formulation suspicious: %d", __func__, args->type); reassure (a == NULL, "%s: actual value given but no arg left", __func__); return newenv; } struct ast * eval_fn_name(struct ast *a, struct env *e, struct env **w) { if (a->type == AST_LIST) { //fprintf(stderr, "eval list\n"); //debug_ast(a, 0); //reassure(0, "not implemented\n"); //return eval(a, e, NULL); return a; } else if (a->type == AST_TOK) { //fprintf(stderr, "eval token: %s\n", a->v.token->v.str); struct ast *fn = lookup(e, a->v.token->v.str); return eval(fn, e, NULL); } } struct ast * eval_fn_call(struct ast *a, struct env *e, struct env **w) { //fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~~~~ eval call\n"); struct ast *fn = eval_fn_name(a, e, NULL); struct ast *fn_body = fn->v.list->next->next; struct ast *fn_args = fn->v.list->next; struct token *args_head = fn->v.list->v.token->next; struct ast *fn_args_values = a->next; //debug_ast(fn, 0); struct env *newenv = bind_args(args_head, fn_args_values, e); return eval(fn_body, newenv, NULL); } struct ast * eval_fn_call_old(struct ast *a, struct env *e, struct env **w) { struct ast *fnargs_values; struct ast *fn_head; struct ast *fn_body; struct env *wanted = NULL; fnargs_values = a->next; reassure(fnargs_values != NULL, "%s: expected RPAR or function args", __func__); if (a->type == AST_TOK) { reassure((AST_TOK == a->type) && a->v.token->type == SYM, "%s: expected SYM, is: %d", __func__, a->type); char *fnstr = a->v.token->v.str; struct ast *fn = lookup(e, fnstr); debug_ast(fn, 0); reassure(fn != NULL && fn->type == AST_LIST, "%s: lookup on `%s` failed", __func__, fnstr); /* struct ast *fn2 = eval(fn, e, &wanted); */ /* fprintf(stderr, "===========\n"); */ debug_ast(fn, 0); /* fn_head = fn2->v.list; */ /* fn_body = fn2->next->next; */ fn_head = fn->v.list; fn_body = fn->next->next; } else if (a->type == AST_LIST && strcmp(a->v.list->v.token->v.str, "lm") == 0) { /* ((lm () ) args) */ fn_head = a->v.list; fn_body = a->v.list->next->next; } else { /* */ struct ast *fn = eval(a, e, &wanted); fn_head = fn->v.list; fn_body = fn->next->next; } reassure(fn_body != NULL && fn_body->type == AST_LIST, "%s: expected function body as list", __func__); reassure(fn_head != NULL && fn_head->type == AST_TOK && fn_head->v.token->type == SYM && strcmp(fn_head->v.token->v.str, "lm") == 0, "%s: expected #fn", __func__); struct token *fnargs = fn_head->v.token->next; reassure(fnargs != NULL && fnargs->type == LPAR, "%s: args expected to be a list", __func__); struct env *newenv = bind_args(fnargs, fnargs_values, e); if (wanted != NULL) { /* fprintf(stderr, "==lastenv\n"); */ /* for (struct env *e = lastenv; e; e=e->next) */ /* fprintf(stderr, "e: %s\n", e->name); */ /* fprintf(stderr, "==newenv:\n"); */ /* for (struct env *e = newenv; e; e=e->next) */ /* fprintf(stderr, "e: %s\n", e->name); */ newenv->parent = wanted; } if (w != NULL) { *w = newenv; } reassure(newenv != NULL, "%s: setup for newenv failed", __func__); return eval(fn_body, newenv, NULL); } struct ast * eval_list(struct ast *a, struct env *e, struct env **w) { if (token_is_internal(a->v.list->v.token)) return eval_internal_sym(a, e); else return eval_fn_call(a->v.list, e, w); } struct ast * eval(struct ast *a, struct env *e, struct env **w) { if (a == NULL) return NULL; switch (a->type) { case AST_TOK: if (token_is_num(a->v.token)) return make_ast(AST_TOK, a->v.token); else if (token_is_sym(a->v.token)) return eval_defined_sym(a, e); else if (token_is_str(a->v.token)) return make_ast(AST_TOK, a->v.token); else if (token_is_bool(a->v.token)) return make_ast(AST_TOK, a->v.token); case AST_LIST: reassure ((a->v.list->type == AST_TOK && a->v.list->v.token->type == SYM) || (a->v.list->type == AST_LIST), "%s: expected LIST or TOK", __func__); return eval_list(a, e, w); default: reassure(0, "%s: wrong AST Type", __func__); } return NULL; } void pr(struct ast *a) { if (a == NULL) { //fprintf(stderr, "nil %p\n", a); return; } switch (a->type) { case AST_TOK: if (a->v.token->type == NUM) printf("%d\n", a->v.token->v.num); else if (a->v.token->type == STR) printf("%s\n", a->v.token->v.str); else if (a->v.token->type == SYM) printf("%s\n", a->v.token->v.str); else if (a->v.token->type == BOOL) { if (a->v.token->v.num) printf("t\n"); else printf("nil\n"); } break; case AST_LIST: printf("#fn\n"); struct token *t = a->v.list->v.token; debug_ast(a, 0); while (t != NULL) { printf("%d ", t->type); t = t->next; } putchar('\n'); break; default: err(1, "don't know how to print"); } } struct token* read_form() { int tok, open = 0; struct token *t, *u = NULL, *start = NULL; while((tok = yylex())) { if (tok == LPAR) open++; else if (tok == RPAR) open--; t = make_token(tok, yylval, yytext); if (start == NULL) start = t; if (u == NULL) u = t; else u->next = t; u = t; if (!open) { return start; } } exit(0); } int main(void) { struct env default_env = { 0 }, *env; env = &default_env; env->name = "__llm"; while (1) pr(eval(parse(read_form()), env, NULL)); return 0; } /* * Local Variables: * mode: c; * eval: (message "main()") * fill-column: 65 * comment-column: 40 * indent-tabs-mode: nil * tab-width: 2 * c-basic-offset: 2 * End: */