/* ~~~~~~~~~~~~~~~~ llm - The little LISP machinery ~~~~~~~~~~~~~ * * Copyright (c) 2019 Christian Barthel * All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the * following conditions * are met: * 1. Redistributions of source code must retain the above * copyright * notice, this list of conditions and the * following disclaimer. * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT * SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Goal: evaluate simple LISP forms (strong type checking) * with a read-eval-print loop. * * Data types: * #fn Functions * t,nil boolean (nand ) -> boolean * "str" Strings * abc Symboles * 1234 Number/Integer (inv ) -> -num * (add ) -> num * (lt ) -> boolean * (num2bool ) -> boolean * * Special Forms: * (def ) * Bind ARG to name SYM in current environment. * (lm () ()) * create function with one or more PARAMs and * BODY. * (if ) * Evaluate BOOL and execute CASETRUE if true, * otherwise evaulate CASEFALSE * (quote a) * Return a. * * Compile: flex tokenize.l && cc lex.yy.c -lfl -o llm * Or: make; ./llm * (tested on FreeBSD, should run on most POSIX * systems) * BUGS: * no freeing of memory (yet) * * Literature: * [1] https://news.ycombinator.com/item?id=8714988 * Discussion about "minimal" set of primitives * for a LISP interpreter. * [2] https://stackoverflow.com/questions/3482389/ * How many primitives does it take to build a * LISP machine? Ten, seven or five? * [3] http://www.cs.cmu.edu/Groups/AI/html/faqs/lang/ \ * lisp/part1/faq-doc-6.html * Discussion about the minimal set of primitives * (see attachment) * [4] https://www.cs.cmu.edu/afs/cs/project/ai-repository \ * /ai/lang/lisp/impl/awk/0.html * LISP Interpreter in AWK. (see attachments) * [5] http://norvig.com/lispy.html * Similar project, with Python instead of C * [6] http://norvig.com/lispy2.html * Improved version of [5] in Python. (attachment) * [7] Structure and Interpretation of Computer Programs, * by Abelson, Sussman, and Sussman * [8] FernUniversität Hagen: Logisches und funktionales * Programmieren, by Prof.Dr.Beierle. * [9] http://norvig.com/jscheme.html * Peter Norvig JScheme */ %{ #define _POSIX_C_SOURCE 200809L #define _XOPEN_SOURCE 600 #include #include #include #include struct token; struct token { int type; union { char *str; int num; } v; struct token *next; }; struct ast; struct env; struct env { char *name; struct ast *a; struct env *parent; struct env *next; }; struct ast { int type; union { struct token *token; struct ast *list; } v; struct ast *next; struct env *env; }; enum asttype { AST_TOK = 400, AST_LIST = 401, AST_QUOTE = 402 }; enum yytokentype { NUM = 258, LPAR = 259, RPAR = 260, SYM = 261, STR = 262, EOL = 263, BOOL = 264 }; struct token* make_token(enum yytokentype, int, char*); struct ast* eval(struct ast *a, struct env *e); struct ast * lookup(struct env *e, char *name); void debug_ast(struct ast *a, int indent); int yylval; char *yystr; %} %% "(" { return LPAR; } ")" { return RPAR; } t { yylval = 1; return BOOL;} nil { yylval = 0; return BOOL; } [0-9]+ { yylval = atoi(yytext); return NUM; } ;.* { /* comment */ } [ \t\n] { /* ignore white space */ } \"[^\"]*\" { return STR; } [a-zA-Z][_a-zA-Z0-9]* { return SYM; } . { err(1, "invalid symbol: %s\n", yytext); } %% /* ------------------------------------------------------------ */ void reassure(int promised, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (!promised) { vfprintf(stderr, fmt, ap); fputc('\n', stderr); exit(1); } va_end(ap); } void debug_token(struct token *x, int indent) { for (int i = 0; i < indent; i++) putc(' ', stderr); if (x == NULL) { fprintf(stderr, "NULL TOKEN\n"); return; } fprintf(stderr, "%d: (%p)", x->type, x); if (x->type == NUM) fprintf(stderr, "%d @ %p", x->v.num, x); if (x->type == STR || x->type == SYM) fprintf(stderr, "%s", x->v.str); putc('\n', stderr); } int debug_tval(struct env *e, char *name) { struct ast *a = lookup(e, name); if (a != NULL && a->type == AST_TOK) return a->v.token->v.num; else return -1; } void debug_env(struct env* e) { if (e== NULL) return; for (struct env* cu = e; cu; cu = cu->next) fprintf(stderr, "e: %s %d %p|%p\n", cu->name, debug_tval(e,cu->name), cu, e); fprintf(stderr, "-p-\n"); debug_env(e->parent); } void debug_ast(struct ast *a, int indent) { while (a) { for (int i = 0; i < indent; i++) putc(' ', stderr); fprintf(stderr, "AST: %s,a=%p,list=%p,to=%p,next=%p,env=%p", (a->type == AST_TOK) ? "AST_TOK" : "AST_LST", a, a->v.list, a->v.token, a->next, a->env); putc('\n', stderr); if (a->env != NULL) debug_env(a->env); if (a->type == AST_LIST) { fprintf(stderr, "enter %p\n", a->v.list); fprintf(stderr, "==\n"); debug_ast(a->v.list, indent+4); fprintf(stderr, "==\n"); } else if(a->type == AST_TOK) debug_token(a->v.token, indent); a = a->next; } } /* ------------------------------------------------------------ */ struct token* make_token(enum yytokentype type, int num, char* str) { struct token *t = (struct token*) calloc(1, sizeof(struct token)); if (t == NULL) err(1, "malloc failed"); t->type = type; if (type == NUM || type == BOOL) t->v.num = num; else if (type == SYM || type == STR) t->v.str = strdup(str); return t; } struct ast* make_ast(enum asttype type, struct token *t) { struct ast *a = (struct ast*) calloc(1, sizeof(struct ast)); if (a == NULL) err(1, "malloc failed"); a->type = type; if (type == AST_TOK) a->v.token = t; else if (type == AST_LIST) return a;//reassure(0, "XXX unexpected state /*a->v.list=n*/"); return a; } struct env* make_env(char *name, struct ast *a) { struct env *e = (struct env*) calloc(1, sizeof(struct env)); if (e == NULL) err(1, "malloc failed"); e->name = name; e->a = a; return e; } /* ------------------------------------------------------------ */ int token_is_num(struct token *t) {return t->type == NUM;} int token_is_sym(struct token *t) {return t->type == SYM;} int token_is_str(struct token *t) {return t->type == STR;} int token_is_bool(struct token *t) {return t->type == BOOL;} int token_is_internal(struct token *t) { if (strcmp(t->v.str, "def") == 0) return 1; else if (strcmp(t->v.str, "lm") == 0) return 1; else if (strcmp(t->v.str, "if") == 0) return 1; else if (strcmp(t->v.str, "inv") == 0) return 1; else if (strcmp(t->v.str, "add") == 0) return 1; else if (strcmp(t->v.str, "lt") == 0) return 1; else if (strcmp(t->v.str, "nand") == 0) return 1; else if (strcmp(t->v.str, "q") == 0) return 1; else if (strcmp(t->v.str, "num2bool") == 0) return 1; else return 0; } /* ------------------------------------------------------------ */ /* Global Variable alarm: * upon parsing a list of tokens like: * "(" -> "def" -> "(" -> "b" -> ")" -> "c" -> ")" * care must be taken when entering a new sub-list (b), * because the processing in the upper level must continue * where the sub-list ended, i.e. "c" must be the next * token on this level. I've solved this with a global * variable `next` and reset the current symbol after * creating a sublist. Not the ideal way, though. */ struct token *next; struct ast* parse(struct token *t) { if (t == NULL) err(1, "Unexpected token: "); if (t->type == LPAR) { struct ast *head = make_ast(AST_LIST, NULL); struct ast *a, *p; t = t->next; while (t->type != RPAR) { reassure(t != NULL, "%s: unexpected NULL", __func__); a = parse(t); reassure(a != NULL, "syntax error"); if (head->v.list == NULL) p = head->v.list = a; else p->next = a; p = a; t = next; /* skip all tokens that are processed */ reassure(t != NULL, "%s: unexpected NULL(2)", __func__); } reassure(t->type == RPAR, "%s: RPAR expected but given: %d", __func__, t->type); t = t->next; next = t; return head; } else if (t->type == NUM) { next = t->next; return make_ast(AST_TOK, t); } else if (t->type == BOOL) { next = t->next; return make_ast(AST_TOK, t); } else if (t->type == SYM) { next = t->next; return make_ast(AST_TOK, t); } else if (t->type == STR) { next = t->next; return make_ast(AST_TOK, t); } return NULL; } void append(struct env *e, struct env *a) { if (e->next == NULL) { e->next = a; return; } else append(e->next, a); } struct ast * lookup(struct env *e, char *name) { if (e == NULL) return NULL; for (struct env *t = e; t != NULL; t = t->next) if (strcmp(t->name, name) == 0) return t->a; return lookup(e->parent, name); } /* LISP Form: (def a ) */ struct ast * eval_def(struct ast *a, struct env *e) { a = a->next; /* skip `def` */ reassure(a->v.token->type == SYM, /* must be a symbol */ "%s: expected SYM, given: %d", __func__, a->v.token->type); reassure(a != NULL && a->next != NULL, /* need sth to bind..*/ "%s: unexpected NULL", __func__); reassure((a->next->type == AST_TOK && /* check type */ a->next->v.token->type== STR) || (a->next->type == AST_TOK && a->next->v.token->type == BOOL) || (a->next->type == AST_TOK && a->next->v.token->type == NUM) || (a->next->type == AST_LIST) || (a->next->type == AST_TOK && a->next->v.token->type== SYM), "%s: expected STR,BOOL,NUM,SYM as assigned expression", __func__); char *new = a->v.token->v.str; for (struct env *x = e; x; x = x->next) reassure(strcmp(x->name, new) != 0, "%s: %s already assigned in env=%p", __func__, new, e); append(e, make_env(new, eval(a->next, e))); reassure(a->next->next == NULL, "%s: ended with unexpected token", __func__); return NULL; } /* LISP form: (if ) */ struct ast * eval_if(struct ast *a, struct env *e) { a = a->next; reassure(a != NULL && a->next != NULL, "%s: unexpected NULL", __func__); struct ast *condition = eval(a, e); reassure(condition != NULL && condition->type == AST_TOK && condition->v.token->type == BOOL, "%s: expected boolean token", __func__); reassure(a->next != NULL && a->next->next != NULL, "%s: unexpected expressions", __func__); if (condition->v.token->v.num == 1) return eval(a->next, e); else return eval(a->next->next, e); } /* LISP form: (nand ) -> BOOL */ struct ast * eval_nand(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next != NULL && a->next->next->next == NULL, "%s: unexpected NULL", __func__); struct ast *op1 = eval(a->next, e); struct ast *op2 = eval(a->next->next, e); reassure(op1 != NULL && op1->type == AST_TOK && op1->v.token->type == BOOL && op2 != NULL && op2->type == AST_TOK && op2->v.token->type == BOOL, "%s: expected operands", __func__); return make_ast(AST_TOK, make_token(BOOL, !(op1->v.token->v.num & op2->v.token->v.num), NULL)); } /* LISP form (add ) -> NUM */ struct ast * eval_add(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next != NULL && a->next->next->next == NULL, "%s: expected 2 operands", __func__); struct ast *op1 = eval(a->next, e); struct ast *op2 = eval(a->next->next, e); reassure(op1 != NULL && op1->type == AST_TOK && op1->v.token->type == NUM && op2 != NULL && op2->type == AST_TOK && op2->v.token->type == NUM, "%s: expected operands", __func__); return make_ast(AST_TOK, make_token(NUM, op1->v.token->v.num + op2->v.token->v.num, NULL)); } /* (lt ) -> BOOL*/ struct ast * eval_lt(struct ast *a, struct env *e) { reassure (a != NULL && a->next != NULL && a->next->next != NULL && a->next->next->next == NULL, "%s: expected 2 operands", __func__); struct ast *op1 = eval(a->next, e); struct ast *op2 = eval(a->next->next, e); reassure(op1 != NULL && op1->type == AST_TOK && op1->v.token->type == NUM && op2 != NULL && op2->type == AST_TOK && op2->v.token->type == NUM, "%s: expected operands", __func__); return make_ast(AST_TOK, make_token(BOOL, op1->v.token->v.num < op2->v.token->v.num, NULL)); } /* LISP form: (inv ) -> NUM */ struct ast * eval_inv(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next == NULL, "%s: expected one argument", __func__); struct ast *op = eval(a->next, e); reassure(op != NULL && op->type == AST_TOK && op->v.token->type == NUM && op->next == NULL, "%s: expected argument", __func__); return make_ast(AST_TOK, make_token(NUM, 0 - op->v.token->v.num, NULL)); } /* (num2bool ) -> bool */ struct ast * eval_num2bool(struct ast *a, struct env *e) { reassure(a != NULL && a->next != NULL && a->next->next == NULL, "%s: expected one operand", __func__); struct ast *op = eval(a->next, e); reassure(op != NULL && op->type == AST_TOK && op->v.token->type == NUM && op->next == NULL, "%s: expected operand", __func__); return make_ast(AST_TOK, make_token(BOOL, op->v.token->v.num != 0, NULL)); } struct ast * /* */ eval_lm(struct ast *x, struct env *e) { //debug_ast(x, 0); reassure(x != NULL && x->type == AST_TOK && x->v.token != NULL && x->v.token->type == SYM, "%s: expected lambda lm", __func__); struct ast *l = make_ast(AST_LIST, NULL); l->v.list = x; l->env = e; return l; } struct ast * eval_internal_sym(struct ast *x, struct env *e) { struct ast *a = x->v.list; if (strcmp(a->v.token->v.str, "def") == 0) return eval_def(a, e); else if (strcmp(a->v.token->v.str, "lm") == 0) return eval_lm(a, e); else if (strcmp(a->v.token->v.str, "if") == 0) return eval_if(a, e); else if (strcmp(a->v.token->v.str, "inv") == 0) return eval_inv(a, e); else if (strcmp(a->v.token->v.str, "add") == 0) return eval_add(a, e); else if (strcmp(a->v.token->v.str, "lt") == 0) return eval_lt(a, e); else if (strcmp(a->v.token->v.str, "nand") == 0) return eval_nand(a, e); else if (strcmp(a->v.token->v.str, "q") == 0) return a->next; else if (strcmp(a->v.token->v.str, "num2bool") == 0) return eval_num2bool(a, e); return NULL; } struct ast * eval_defined_sym(struct ast *a, struct env *e) { struct ast *var = lookup(e, a->v.token->v.str); if (var == NULL) debug_env(e); reassure(var != NULL, "%s: symbol %s not found", __func__, a->v.token->v.str); return var; } struct env * bind_args(struct token *args, struct ast *fnargs_values, struct env *e1, struct env *e) { reassure(args && args->type == LPAR, "%s: first argument: %d %s", __func__, args->type, args->v.str); args = args->next; struct ast *a = fnargs_values; struct token *t = args; struct env *newenv = (struct env*)calloc(1, sizeof(struct env)); reassure(newenv != NULL, "%s: allocation failed for newenv", __func__); newenv->name = strdup("__fn"); newenv->parent = e; while (t != NULL && t->type != RPAR) { reassure(t != NULL && t->type == SYM && t->v.str != NULL, "%s: wrong args param found: %s", __func__, t->v.str); reassure(a != NULL && (a->type == AST_TOK || a->type == AST_LIST), "%s: arg=%s given, but no value present", __func__, t->v.str); append(newenv, make_env(t->v.str, eval(a, e1))); t = t->next; a = a->next; } reassure (t->type == RPAR, "%s: args formulation suspicious: %d", __func__, args->type); reassure (a == NULL, "%s: actual value given but no arg left", __func__); return newenv; } struct ast * eval_fn_call(struct ast *a, struct env *e) { struct ast *fn = eval(a, e); reassure(fn != NULL && fn->type == AST_LIST && fn->v.list != NULL && fn->v.list->next != NULL && fn->v.list->next->next != NULL && fn->v.list->type == AST_TOK && fn->v.list->v.token != NULL && fn->v.list->v.token->next != NULL, "%s: function evaluation returned or" "wrong form, expected: (lm )", __func__); //struct ast *fn_args = fn->v.list->next; struct ast *fn_body = fn->v.list->next->next; struct token *args_head = fn->v.list->v.token->next; struct ast *fn_args_values = a->next; struct env *newenv = bind_args(args_head, fn_args_values, e, fn->env); return eval(fn_body, newenv); } struct ast * eval_list(struct ast *a, struct env *e) { if (token_is_internal(a->v.list->v.token)) return eval_internal_sym(a, e); else return eval_fn_call(a->v.list, e); } struct ast * eval(struct ast *a, struct env *e) { if (a == NULL) return NULL; switch (a->type) { case AST_TOK: if (token_is_num(a->v.token)) return make_ast(AST_TOK, a->v.token); else if (token_is_sym(a->v.token)) return eval_defined_sym(a, e); else if (token_is_str(a->v.token)) return make_ast(AST_TOK, a->v.token); else if (token_is_bool(a->v.token)) return make_ast(AST_TOK, a->v.token); reassure(0, "Unexptected token found"); case AST_LIST: reassure ((a->v.list->type == AST_TOK && a->v.list->v.token->type == SYM) || (a->v.list->type == AST_LIST), "%s: expected LIST or TOK", __func__); return eval_list(a, e); default: reassure(0, "%s: wrong AST Type", __func__); } return NULL; } void pr(struct ast *a) { if (a == NULL) return; switch (a->type) { case AST_TOK: if (a->v.token->type == NUM) printf("%d\n", a->v.token->v.num); else if (a->v.token->type == STR) printf("%s\n", a->v.token->v.str); else if (a->v.token->type == SYM) printf("%s\n", a->v.token->v.str); else if (a->v.token->type == BOOL) { if (a->v.token->v.num) printf("t\n"); else printf("nil\n"); } break; case AST_LIST: printf("#fn\n"); break; default: err(1, "don't know how to print"); } } struct token* read_form() { int tok, open = 0; struct token *t, *u = NULL, *start = NULL; while((tok = yylex())) { if (tok == LPAR) open++; else if (tok == RPAR) open--; t = make_token(tok, yylval, yytext); if (start == NULL) start = t; if (u == NULL) u = t; else u->next = t; u = t; if (!open) return start; } exit(0); } int main(void) { struct env default_env = { 0 }, *env; env = &default_env; env->name = "__llm"; while (1) pr(eval(parse(read_form()), env)); return 0; } /* * Local Variables: * mode: c; * eval: (message "main()") * fill-column: 65 * comment-column: 40 * indent-tabs-mode: nil * tab-width: 2 * c-basic-offset: 2 * End: */