llm/llm.l

803 lines
22 KiB
Plaintext

/* ~~~~~~~~~~~~~~~~ llm - The little LISP machinery ~~~~~~~~~~~~~
*
* Copyright (c) 2019 Christian Barthel <bch@online.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the
* following conditions
* are met:
* 1. Redistributions of source code must retain the above
* copyright * notice, this list of conditions and the
* following disclaimer.
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS
* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
* Goal: evaluate simple LISP forms (strong type checking)
* with a read-eval-print loop.
*
* Data types:
* #fn Functions
* t,nil boolean (nand <bool> <bool>) -> boolean
* "str" Strings
* abc Symboles
* 1234 Number/Integer (inv <num>) -> -num
* (add <num> <num>) -> num
* (lt <num> <num>) -> boolean
* (num2bool <num>) -> boolean
*
* Special Forms:
* (def <SYM> <ARG:symbol,number,#function>)
* Bind ARG to name SYM in current environment.
* (lm (<PARAM>) (<BODY>))
* create function with one or more PARAMs and
* BODY.
* (if <BOOL> <CASE_TRUE> <CASE_FALSE>)
* Evaluate BOOL and execute CASETRUE if true,
* otherwise evaulate CASEFALSE
* (quote a)
* Return a.
*
* Compile: flex tokenize.l && cc lex.yy.c -lfl -o llm
* Or: make; ./llm
* (tested on FreeBSD, should run on most POSIX
* systems)
* BUGS:
* no freeing of memory (yet)
*
* Literature:
* [1] https://news.ycombinator.com/item?id=8714988
* Discussion about "minimal" set of primitives
* for a LISP interpreter.
* [2] https://stackoverflow.com/questions/3482389/
* How many primitives does it take to build a
* LISP machine? Ten, seven or five?
* [3] http://www.cs.cmu.edu/Groups/AI/html/faqs/lang/ \
* lisp/part1/faq-doc-6.html
* Discussion about the minimal set of primitives
* (see attachment)
* [4] https://www.cs.cmu.edu/afs/cs/project/ai-repository \
* /ai/lang/lisp/impl/awk/0.html
* LISP Interpreter in AWK. (see attachments)
* [5] http://norvig.com/lispy.html
* Similar project, with Python instead of C
* [6] http://norvig.com/lispy2.html
* Improved version of [5] in Python. (attachment)
* [7] Structure and Interpretation of Computer Programs,
* by Abelson, Sussman, and Sussman
* [8] FernUniversität Hagen: Logisches und funktionales
* Programmieren, by Prof.Dr.Beierle.
* [9] http://norvig.com/jscheme.html
* Peter Norvig JScheme
*/
%{
#define _POSIX_C_SOURCE 200809L
#define _XOPEN_SOURCE 600
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <err.h>
struct token;
struct token {
int type;
union {
char *str;
int num;
} v;
struct token *next;
};
struct ast;
struct env;
struct env {
char *name;
struct ast *a;
struct env *parent;
struct env *next;
};
struct ast {
int type;
union {
struct token *token;
struct ast *list;
} v;
struct ast *next;
struct env *env;
};
enum asttype {
AST_TOK = 400,
AST_LIST = 401,
AST_QUOTE = 402
};
enum yytokentype {
NUM = 258,
LPAR = 259,
RPAR = 260,
SYM = 261,
STR = 262,
EOL = 263,
BOOL = 264
};
struct token* make_token(enum yytokentype, int, char*);
struct ast* eval(struct ast *a, struct env *e);
struct ast * lookup(struct env *e, char *name);
void debug_ast(struct ast *a, int indent);
int yylval;
char *yystr;
%}
%%
"(" { return LPAR; }
")" { return RPAR; }
t { yylval = 1; return BOOL;}
nil { yylval = 0; return BOOL; }
[0-9]+ { yylval = atoi(yytext); return NUM; }
;.* { /* comment */ }
[ \t\n] { /* ignore white space */ }
\"[^\"]*\" { return STR; }
[a-zA-Z][_a-zA-Z0-9]* { return SYM; }
. { err(1, "invalid symbol: %s\n", yytext); }
%%
/* ------------------------------------------------------------ */
void
reassure(int promised, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
if (!promised) {
vfprintf(stderr, fmt, ap);
fputc('\n', stderr);
exit(1);
}
va_end(ap);
}
void
debug_token(struct token *x, int indent)
{
for (int i = 0; i < indent; i++) putc(' ', stderr);
if (x == NULL) {
fprintf(stderr, "NULL TOKEN\n");
return;
}
fprintf(stderr, "%d: (%p)", x->type, x);
if (x->type == NUM)
fprintf(stderr, "%d @ %p", x->v.num, x);
if (x->type == STR || x->type == SYM)
fprintf(stderr, "%s", x->v.str);
putc('\n', stderr);
}
int debug_tval(struct env *e, char *name)
{
struct ast *a = lookup(e, name);
if (a != NULL && a->type == AST_TOK)
return a->v.token->v.num;
else
return -1;
}
void
debug_env(struct env* e) {
if (e== NULL) return;
for (struct env* cu = e; cu; cu = cu->next)
fprintf(stderr, "e: %s %d %p|%p\n",
cu->name, debug_tval(e,cu->name), cu, e);
fprintf(stderr, "-p-\n");
debug_env(e->parent);
}
void
debug_ast(struct ast *a, int indent)
{
while (a) {
for (int i = 0; i < indent; i++) putc(' ', stderr);
fprintf(stderr, "AST: %s,a=%p,list=%p,to=%p,next=%p,env=%p",
(a->type == AST_TOK) ? "AST_TOK" :
"AST_LST", a, a->v.list, a->v.token, a->next, a->env);
putc('\n', stderr);
if (a->env != NULL)
debug_env(a->env);
if (a->type == AST_LIST) {
fprintf(stderr, "enter %p\n", a->v.list);
fprintf(stderr, "==\n");
debug_ast(a->v.list, indent+4);
fprintf(stderr, "==\n");
}
else if(a->type == AST_TOK)
debug_token(a->v.token, indent);
a = a->next;
}
}
/* ------------------------------------------------------------ */
struct token*
make_token(enum yytokentype type, int num, char* str)
{
struct token *t =
(struct token*) calloc(1, sizeof(struct token));
if (t == NULL)
err(1, "malloc failed");
t->type = type;
if (type == NUM || type == BOOL)
t->v.num = num;
else if (type == SYM || type == STR)
t->v.str = strdup(str);
return t;
}
struct ast*
make_ast(enum asttype type, struct token *t)
{
struct ast *a = (struct ast*) calloc(1, sizeof(struct ast));
if (a == NULL)
err(1, "malloc failed");
a->type = type;
if (type == AST_TOK)
a->v.token = t;
else if (type == AST_LIST)
return a;//reassure(0, "XXX unexpected state /*a->v.list=n*/");
return a;
}
struct env*
make_env(char *name, struct ast *a)
{
struct env *e = (struct env*) calloc(1, sizeof(struct env));
if (e == NULL)
err(1, "malloc failed");
e->name = name;
e->a = a;
return e;
}
/* ------------------------------------------------------------ */
int token_is_num(struct token *t) {return t->type == NUM;}
int token_is_sym(struct token *t) {return t->type == SYM;}
int token_is_str(struct token *t) {return t->type == STR;}
int token_is_bool(struct token *t) {return t->type == BOOL;}
int
token_is_internal(struct token *t)
{
if (strcmp(t->v.str, "def") == 0) return 1;
else if (strcmp(t->v.str, "lm") == 0) return 1;
else if (strcmp(t->v.str, "if") == 0) return 1;
else if (strcmp(t->v.str, "inv") == 0) return 1;
else if (strcmp(t->v.str, "add") == 0) return 1;
else if (strcmp(t->v.str, "lt") == 0) return 1;
else if (strcmp(t->v.str, "nand") == 0) return 1;
else if (strcmp(t->v.str, "q") == 0) return 1;
else if (strcmp(t->v.str, "num2bool") == 0) return 1;
else return 0;
}
/* ------------------------------------------------------------ */
/* Global Variable alarm:
* upon parsing a list of tokens like:
* "(" -> "def" -> "(" -> "b" -> ")" -> "c" -> ")"
* care must be taken when entering a new sub-list (b),
* because the processing in the upper level must continue
* where the sub-list ended, i.e. "c" must be the next
* token on this level. I've solved this with a global
* variable `next` and reset the current symbol after
* creating a sublist. Not the ideal way, though.
*/
struct token *next;
struct ast*
parse(struct token *t)
{
if (t == NULL)
err(1, "Unexpected token: <NULL>");
if (t->type == LPAR) {
struct ast *head = make_ast(AST_LIST, NULL);
struct ast *a, *p;
t = t->next;
while (t->type != RPAR) {
reassure(t != NULL, "%s: unexpected NULL", __func__);
a = parse(t);
reassure(a != NULL, "syntax error");
if (head->v.list == NULL)
p = head->v.list = a;
else
p->next = a;
p = a;
t = next; /* skip all tokens that are processed */
reassure(t != NULL, "%s: unexpected NULL(2)", __func__);
}
reassure(t->type == RPAR,
"%s: RPAR expected but given: %d",
__func__, t->type);
t = t->next;
next = t;
return head;
} else if (t->type == NUM) {
next = t->next;
return make_ast(AST_TOK, t);
} else if (t->type == BOOL) {
next = t->next;
return make_ast(AST_TOK, t);
} else if (t->type == SYM) {
next = t->next;
return make_ast(AST_TOK, t);
} else if (t->type == STR) {
next = t->next;
return make_ast(AST_TOK, t);
}
return NULL;
}
void
append(struct env *e, struct env *a)
{
if (e->next == NULL) {
e->next = a;
return;
} else
append(e->next, a);
}
struct ast *
lookup(struct env *e, char *name)
{
if (e == NULL)
return NULL;
for (struct env *t = e; t != NULL; t = t->next)
if (strcmp(t->name, name) == 0)
return t->a;
return lookup(e->parent, name);
}
/* LISP Form: (def a <ausdruck>) */
struct ast *
eval_def(struct ast *a, struct env *e)
{
a = a->next; /* skip `def` */
reassure(a->v.token->type == SYM, /* must be a symbol */
"%s: expected SYM, given: %d",
__func__, a->v.token->type);
reassure(a != NULL && a->next != NULL, /* need sth to bind..*/
"%s: unexpected NULL", __func__);
reassure((a->next->type == AST_TOK && /* check type */
a->next->v.token->type== STR) ||
(a->next->type == AST_TOK &&
a->next->v.token->type == BOOL) ||
(a->next->type == AST_TOK &&
a->next->v.token->type == NUM) ||
(a->next->type == AST_LIST) ||
(a->next->type == AST_TOK &&
a->next->v.token->type== SYM),
"%s: expected STR,BOOL,NUM,SYM as assigned expression",
__func__);
char *new = a->v.token->v.str;
for (struct env *x = e; x; x = x->next)
reassure(strcmp(x->name, new) != 0,
"%s: %s already assigned in env=%p",
__func__, new, e);
append(e, make_env(new, eval(a->next, e)));
reassure(a->next->next == NULL,
"%s: ended with unexpected token", __func__);
return NULL;
}
/* LISP form: (if <cond:bool> <expression> <expression>) */
struct ast *
eval_if(struct ast *a, struct env *e)
{
a = a->next;
reassure(a != NULL && a->next != NULL,
"%s: unexpected NULL", __func__);
struct ast *condition = eval(a, e);
reassure(condition != NULL &&
condition->type == AST_TOK &&
condition->v.token->type == BOOL,
"%s: expected boolean token", __func__);
reassure(a->next != NULL && a->next->next != NULL,
"%s: unexpected expressions",
__func__);
if (condition->v.token->v.num == 1)
return eval(a->next, e);
else
return eval(a->next->next, e);
}
/* LISP form: (nand <ausdruck::bool> <audsruck::bool>) -> BOOL */
struct ast *
eval_nand(struct ast *a, struct env *e)
{
reassure(a != NULL &&
a->next != NULL &&
a->next->next != NULL &&
a->next->next->next == NULL,
"%s: unexpected NULL", __func__);
struct ast *op1 = eval(a->next, e);
struct ast *op2 = eval(a->next->next, e);
reassure(op1 != NULL &&
op1->type == AST_TOK &&
op1->v.token->type == BOOL &&
op2 != NULL &&
op2->type == AST_TOK &&
op2->v.token->type == BOOL,
"%s: expected <bool> <bool> operands", __func__);
return make_ast(AST_TOK,
make_token(BOOL,
!(op1->v.token->v.num &
op2->v.token->v.num),
NULL));
}
/* LISP form (add <ausdruck::NUM> <audsruck::NUM>) -> NUM */
struct ast *
eval_add(struct ast *a, struct env *e)
{
reassure(a != NULL &&
a->next != NULL &&
a->next->next != NULL &&
a->next->next->next == NULL,
"%s: expected 2 operands", __func__);
struct ast *op1 = eval(a->next, e);
struct ast *op2 = eval(a->next->next, e);
reassure(op1 != NULL &&
op1->type == AST_TOK &&
op1->v.token->type == NUM &&
op2 != NULL &&
op2->type == AST_TOK &&
op2->v.token->type == NUM,
"%s: expected <num> <num> operands", __func__);
return make_ast(AST_TOK,
make_token(NUM,
op1->v.token->v.num +
op2->v.token->v.num,
NULL));
}
/* (lt <ausdruck::NUM> <audsruck::NUM>) -> BOOL*/
struct ast *
eval_lt(struct ast *a, struct env *e)
{
reassure (a != NULL &&
a->next != NULL &&
a->next->next != NULL &&
a->next->next->next == NULL,
"%s: expected 2 operands", __func__);
struct ast *op1 = eval(a->next, e);
struct ast *op2 = eval(a->next->next, e);
reassure(op1 != NULL &&
op1->type == AST_TOK &&
op1->v.token->type == NUM &&
op2 != NULL &&
op2->type == AST_TOK &&
op2->v.token->type == NUM,
"%s: expected <NUM> <NUM> operands", __func__);
return make_ast(AST_TOK,
make_token(BOOL,
op1->v.token->v.num <
op2->v.token->v.num,
NULL));
}
/* LISP form: (inv <ausdruck::NUM>) -> NUM */
struct ast *
eval_inv(struct ast *a, struct env *e)
{
reassure(a != NULL &&
a->next != NULL &&
a->next->next == NULL,
"%s: expected one argument", __func__);
struct ast *op = eval(a->next, e);
reassure(op != NULL &&
op->type == AST_TOK &&
op->v.token->type == NUM &&
op->next == NULL,
"%s: expected <NUM> argument", __func__);
return make_ast(AST_TOK,
make_token(NUM,
0 - op->v.token->v.num,
NULL));
}
/* (num2bool <ausdruck::NUM>) -> bool */
struct ast *
eval_num2bool(struct ast *a, struct env *e)
{
reassure(a != NULL &&
a->next != NULL &&
a->next->next == NULL,
"%s: expected one operand", __func__);
struct ast *op = eval(a->next, e);
reassure(op != NULL &&
op->type == AST_TOK &&
op->v.token->type == NUM &&
op->next == NULL,
"%s: expected <num> operand", __func__);
return make_ast(AST_TOK,
make_token(BOOL,
op->v.token->v.num != 0,
NULL));
}
struct ast * /* */
eval_lm(struct ast *x, struct env *e)
{
//debug_ast(x, 0);
reassure(x != NULL &&
x->type == AST_TOK &&
x->v.token != NULL &&
x->v.token->type == SYM,
"%s: expected lambda lm", __func__);
struct ast *l = make_ast(AST_LIST, NULL);
l->v.list = x;
l->env = e;
return l;
}
struct ast *
eval_internal_sym(struct ast *x, struct env *e)
{
struct ast *a = x->v.list;
if (strcmp(a->v.token->v.str, "def") == 0)
return eval_def(a, e);
else if (strcmp(a->v.token->v.str, "lm") == 0)
return eval_lm(a, e);
else if (strcmp(a->v.token->v.str, "if") == 0)
return eval_if(a, e);
else if (strcmp(a->v.token->v.str, "inv") == 0)
return eval_inv(a, e);
else if (strcmp(a->v.token->v.str, "add") == 0)
return eval_add(a, e);
else if (strcmp(a->v.token->v.str, "lt") == 0)
return eval_lt(a, e);
else if (strcmp(a->v.token->v.str, "nand") == 0)
return eval_nand(a, e);
else if (strcmp(a->v.token->v.str, "q") == 0)
return a->next;
else if (strcmp(a->v.token->v.str, "num2bool") == 0)
return eval_num2bool(a, e);
return NULL;
}
struct ast *
eval_defined_sym(struct ast *a, struct env *e)
{
struct ast *var = lookup(e, a->v.token->v.str);
if (var == NULL)
debug_env(e);
reassure(var != NULL,
"%s: symbol %s not found",
__func__, a->v.token->v.str);
return var;
}
struct env *
bind_args(struct token *args,
struct ast *fnargs_values, struct env *e1, struct env *e)
{
reassure(args && args->type == LPAR,
"%s: first argument: %d %s", __func__,
args->type, args->v.str);
args = args->next;
struct ast *a = fnargs_values;
struct token *t = args;
struct env *newenv = (struct env*)calloc(1, sizeof(struct env));
reassure(newenv != NULL,
"%s: allocation failed for newenv", __func__);
newenv->name = strdup("__fn");
newenv->parent = e;
while (t != NULL && t->type != RPAR) {
reassure(t != NULL &&
t->type == SYM &&
t->v.str != NULL,
"%s: wrong args param found: %s",
__func__, t->v.str);
reassure(a != NULL &&
(a->type == AST_TOK || a->type == AST_LIST),
"%s: arg=%s given, but no value present",
__func__, t->v.str);
append(newenv,
make_env(t->v.str, eval(a, e1)));
t = t->next;
a = a->next;
}
reassure (t->type == RPAR,
"%s: args formulation suspicious: %d",
__func__, args->type);
reassure (a == NULL,
"%s: actual value given but no arg left",
__func__);
return newenv;
}
struct ast *
eval_fn_call(struct ast *a, struct env *e)
{
struct ast *fn = eval(a, e);
reassure(fn != NULL &&
fn->type == AST_LIST &&
fn->v.list != NULL &&
fn->v.list->next != NULL &&
fn->v.list->next->next != NULL &&
fn->v.list->type == AST_TOK &&
fn->v.list->v.token != NULL &&
fn->v.list->v.token->next != NULL,
"%s: function evaluation returned <NULL> or"
"wrong form, expected: (lm <param> <body>)", __func__);
//struct ast *fn_args = fn->v.list->next;
struct ast *fn_body = fn->v.list->next->next;
struct token *args_head = fn->v.list->v.token->next;
struct ast *fn_args_values = a->next;
struct env *newenv =
bind_args(args_head, fn_args_values, e, fn->env);
return eval(fn_body, newenv);
}
struct ast *
eval_list(struct ast *a, struct env *e)
{
if (token_is_internal(a->v.list->v.token))
return eval_internal_sym(a, e);
else
return eval_fn_call(a->v.list, e);
}
struct ast *
eval(struct ast *a, struct env *e)
{
if (a == NULL)
return NULL;
switch (a->type) {
case AST_TOK:
if (token_is_num(a->v.token))
return make_ast(AST_TOK, a->v.token);
else if (token_is_sym(a->v.token))
return eval_defined_sym(a, e);
else if (token_is_str(a->v.token))
return make_ast(AST_TOK, a->v.token);
else if (token_is_bool(a->v.token))
return make_ast(AST_TOK, a->v.token);
reassure(0, "Unexptected token found");
case AST_LIST:
reassure ((a->v.list->type == AST_TOK &&
a->v.list->v.token->type == SYM) ||
(a->v.list->type == AST_LIST),
"%s: expected LIST or TOK", __func__);
return eval_list(a, e);
default:
reassure(0, "%s: wrong AST Type", __func__);
}
return NULL;
}
void
pr(struct ast *a)
{
if (a == NULL)
return;
switch (a->type) {
case AST_TOK:
if (a->v.token->type == NUM)
printf("%d\n", a->v.token->v.num);
else if (a->v.token->type == STR)
printf("%s\n", a->v.token->v.str);
else if (a->v.token->type == SYM)
printf("%s\n", a->v.token->v.str);
else if (a->v.token->type == BOOL) {
if (a->v.token->v.num)
printf("t\n");
else
printf("nil\n");
}
break;
case AST_LIST:
printf("#fn\n");
break;
default:
err(1, "don't know how to print");
}
}
struct token*
read_form()
{
int tok, open = 0;
struct token *t, *u = NULL, *start = NULL;
while((tok = yylex())) {
if (tok == LPAR) open++;
else if (tok == RPAR) open--;
t = make_token(tok, yylval, yytext);
if (start == NULL) start = t;
if (u == NULL) u = t;
else u->next = t;
u = t;
if (!open)
return start;
}
exit(0);
}
int
main(void)
{
struct env default_env = { 0 }, *env;
env = &default_env;
env->name = "__llm";
while (1)
pr(eval(parse(read_form()), env));
return 0;
}
/*
* Local Variables:
* mode: c;
* eval: (message "main()")
* fill-column: 65
* comment-column: 40
* indent-tabs-mode: nil
* tab-width: 2
* c-basic-offset: 2
* End:
*/