From ca33a1096ce9dd8823b476b9244c8c133dee2a38 Mon Sep 17 00:00:00 2001 From: Christian Barthel Date: Sat, 29 Jun 2019 12:06:43 +0200 Subject: [PATCH] parsing and environment infrastructure . add env struct and parameter, . add lookup() and append() functions . add `q` and `add` in eval, --- tokenize.l | 243 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 171 insertions(+), 72 deletions(-) diff --git a/tokenize.l b/tokenize.l index b47c405..f08cccc 100644 --- a/tokenize.l +++ b/tokenize.l @@ -1,6 +1,7 @@ -/* - * flex tokenize.l - * cc lex.yy.c -lfl +/* llm - litle lisp machine: + * Author: Christian Barthel + * Compile: flex tokenize.l + * cc lex.yy.c -lfl */ %{ #include @@ -11,26 +12,34 @@ struct token; struct token { int type; union { - char *str; - int num; + char *str; + int num; } v; struct token *next; }; struct ast; struct ast { - int type; + int type; union { struct token *token; - struct ast *list; + struct ast *list; } v; - struct ast *next; + struct ast *next; +}; + +struct env; +struct env { + char *name; + struct ast *a; + struct env *parent; + struct env *next; }; - enum asttype { AST_TOK = 400, - AST_LIST = 401 + AST_LIST = 401, + AST_QUOTE = 402 }; enum yytokentype { NUM = 258, @@ -38,7 +47,8 @@ enum yytokentype { RPAR = 260, SYM = 261, STR = 262, - EOL = 263 + EOL = 263, + QUOTE = 264 }; struct token *make_token(enum yytokentype, int, char*); @@ -47,32 +57,62 @@ char *yystr; %} %% -"(" { return LPAR; } -")" { return RPAR; } -[0-9]+ { yylval = atoi(yytext); return NUM; } -\n { return EOL; } -[ \t\n] { /* ignore white space */ } -\".*\" { return STR; } -[a-zA-Z][a-zA-Z0-9]* { return SYM; } -. { err(1, "invalid symbol: %s\n", yytext); } +"(" { return LPAR; } +")" { return RPAR; } +[0-9]+ { yylval = atoi(yytext); return NUM; } +\n { return EOL; } +[ \t\n] { /* ignore white space */ } +\".*\" { return STR; } +[a-zA-Z][a-zA-Z0-9]* { return SYM; } +. { err(1, "invalid symbol: %s\n", yytext); } %% /* * Goal: write LISP read-eval-print Loop and support - * @ Function - * 0,1 boolean (nand 0 1) -> boolean - * (eq 0 0) -> boolean - * "str" Strings (streq str1 str2) -> boolean - * abc Symboles (symeq s1 s2) -> boolean - * 1234 Number/Integer [add,sub,div,mul,mod] -> number - * [lt, numeq] -> boolean - * [num2bool] -> boolean + * @ Function + * 0,1 boolean (nand 0 1) -> boolean + * (eq 0 0) -> boolean + * "str" Strings (streq str1 str2) -> boolean + * abc Symboles (symeq s1 s2) -> boolean + * 1234 Number/Integer [add,sub,div,mul,mod] -> num + * [lt, numeq] -> boolean + * [num2bool] -> boolean * * (def ..) ( )) * (if ) + * (quote a) -> a */ +void +debug_token(struct token *x, int indent) { + for (int i = 0; i < indent; i++) putc(' ', stderr); + fprintf(stderr, "%d: (%p)", x->type, x); + if (x->type == NUM) fprintf(stderr, "%d @ %p", x->v.num, x); + if (x->type == STR || x->type == SYM) + fprintf(stderr, "%s", x->v.str); + putc('\n', stderr); +} + +void +debug_ast(struct ast *a, int indent) { + while (a) { + for (int i = 0; i < indent; i++) putc(' ', stderr); + fprintf(stderr, "AST: %s,a=%p,list=%p,token=%p,next=%p", (a->type == AST_TOK) ? "AST_TOK" : + "AST_LIST", a, a->v.list, a->v.token, a->next); + putc('\n', stderr); + if (a->type == AST_LIST) { + fprintf(stderr, "enter %p\n", a->v.list); + fprintf(stderr, "==\n"); + debug_ast(a->v.list, indent+4); + fprintf(stderr, "==\n"); + } + else if(a->type == AST_TOK) + debug_token(a->v.token, indent); + a = a->next; + } +} + struct token* make_token(enum yytokentype type, int num, char* str) { @@ -91,7 +131,8 @@ make_token(enum yytokentype type, int num, char* str) struct ast* make_ast(enum asttype type, struct token *t) { - struct ast *a = (struct ast*) calloc(1, sizeof(struct ast)); + struct ast *a = + (struct ast*) calloc(1, sizeof(struct ast)); if (a == NULL) err(1, "malloc failed"); a->type = type; @@ -102,6 +143,25 @@ make_ast(enum asttype type, struct token *t) return a; } +struct env* +make_env(char *name, struct ast *a) +{ + struct env *e = + (struct env*) calloc(1, sizeof(struct env)); + if (e == NULL) + err(1, "malloc failed"); + e->name = name; + e->a = a; + return e; +} + +/* upon parsing a list of tokens like: + * "(" -> "def" -> "(" -> "b" -> ")" -> "c" -> ")" + * care must be taken when entering a new sub-list (b), + * because the processing in the upper level must continue + * where the sub-list ended, i.e. "c" must be the next + * token on this level. + */ struct token *next; struct ast* parse(struct token *t) @@ -118,85 +178,113 @@ parse(struct token *t) a = parse(t); if (a == NULL) err(1, "syntax error"); - if (head->v.list == NULL) { + if (head->v.list == NULL) p = head->v.list = a; - } else { + else p->next = a; - } p = a; - t = next; /* skip all tokens that are processed */ + t = next; /* skip all tokens that are processed */ assert(t != NULL); } - fprintf(stderr, "a=%p, t=%p\n", a, t); assert(t->type == RPAR); t = t->next; next = t; return head; } else if (t->type == NUM) { - printf("is a number %d\n", t->v.num); next = t->next; return make_ast(AST_TOK, t); } else if (t->type == SYM) { - printf("is a sym: %s (%p)\n", t->v.str, t); next = t->next; return make_ast(AST_TOK, t); } else if (t->type == STR) { next = t->next; - printf("is a str: %s\n", t->v.str); return make_ast(AST_TOK, t); } return NULL; } - - void -debug_token(struct token *x, indent) { - for (int i = 0; i < indent; i++) putc(' ', stderr); - fprintf(stderr, "%d: (%p)", x->type, x); - if (x->type == NUM) fprintf(stderr, "%d @ %p", x->v.num, x); - if (x->type == STR || x->type == SYM) - fprintf(stderr, "%s", x->v.str); - putc('\n', stderr); -} - -void -debug_ast(struct ast *a, indent) { - while (a) { - for (int i = 0; i < indent; i++) putc(' ', stderr); - fprintf(stderr, "AST: %s,a=%p,list=%p,token=%p,next=%p", (a->type == AST_TOK) ? "AST_TOK" : - "AST_LIST", a, a->v.list, a->v.token, a->next); - putc('\n', stderr); - if (a->type == AST_LIST) { - fprintf(stderr, "enter %p\n", a->v.list); - fprintf(stderr, "==\n"); - debug_ast(a->v.list, indent+4); - fprintf(stderr, "==\n"); - } - else if(a->type == AST_TOK) - debug_token(a->v.token, indent); - a = a->next; - } +append(struct env *e, struct env *a) +{ + if (e->next == NULL) { + e->next = a; + return; + } else + append(e->next, a); } struct ast * -eval(struct ast *a) +lookup(struct env *e, char *name) { + if (e == NULL) + return NULL; + for (struct env *t = e; t != NULL; t = t->next) + if (strcmp(t->name, name) == 0) + return t->a; + return lookup(e->parent, name); +} + +struct ast * +eval(struct ast *a, struct env *e) +{ + if (a == NULL) return NULL; + if (a->type == AST_TOK) { if (a->v.token->type == NUM) { return make_ast(AST_TOK, a->v.token); } else if (a->v.token->type == SYM) { + fprintf(stderr, "process: %s\n", a->v.token->v.str); if (strcmp(a->v.token->v.str, "def") == 0) { - printf("define: %s\n", a->next->v.token->v.str); - /* x = eval a->next->next */ - /* install: var:=x */ + a = a->next; + assert((a->next->type == AST_TOK && + a->next->v.token->type== STR) || + (a->next->type == AST_TOK && + a->next->v.token->type == NUM) || + (a->next->type == AST_LIST) || + (a->next->type == AST_TOK && + a->next->v.token->type== SYM)); + append(e, make_env(a->v.token->v.str, eval(a->next, e))); + return NULL; + } else if (strcmp(a->v.token->v.str, "lm") == 0) { + } else if (strcmp(a->v.token->v.str, "if") == 0) { + } else if (strcmp(a->v.token->v.str, "add") == 0) { + assert (a != NULL && + a->next != NULL && + a->next->next != NULL && + a->next->next->next == NULL); + + struct ast *op1 = eval(a->next, e); + struct ast *op2 = eval(a->next->next, e); + + assert (op1 != NULL && + op1->type == AST_TOK && + op1->v.token->type == NUM && + op2 != NULL && + op2->type == AST_TOK && + op2->v.token->type == NUM); + return make_ast(AST_TOK, + make_token(NUM, + op1->v.token->v.num + + op2->v.token->v.num, + NULL)); + } else if (strcmp(a->v.token->v.str, "q") == 0) { + //return make_ast(AST_QUOTE, a->next); + return a->next; + } else { + // fprintf(stderr, "Lookup: %s\n", a->v.token->v.str); + struct ast *var = lookup(e, a->v.token->v.str); + assert(var != NULL); /* variable found? */ + return var; } + /* XXX: nand, eq, streq, symeq, add, sub, div, mul, + * mod, lt, numeq, num2bool + */ } else if (a->v.token->type == STR) - fprintf(stderr, "str\n"); + return make_ast(AST_TOK, a->v.token); } else if (a->type == AST_LIST) { - eval(a->v.list); + return eval(a->v.list, e); } else { fprintf(stderr, "%d\n", a->type); assert(0); @@ -207,10 +295,18 @@ eval(struct ast *a) void pr(struct ast *a) { + if (a == NULL) { + fprintf(stderr, "null %p\n", a); + return; + } switch (a->type) { case AST_TOK: if (a->v.token->type == NUM) printf("%d\n", a->v.token->v.num); + else if (a->v.token->type == STR) + printf("%s\n", a->v.token->v.str); + else if (a->v.token->type == SYM) + printf("%s\n", a->v.token->v.str); } } @@ -218,8 +314,11 @@ int main(void) { int tok; struct token *t, *u = NULL, *start = NULL; + struct env default_env = { 0 }, *env; + env = &default_env; + env->name = ""; - while(tok = yylex()) { + while((tok = yylex())) { /* read token and create list */ t = make_token(tok, yylval, yytext); if (start == NULL) start = t; @@ -234,7 +333,7 @@ int main(void) /* struct ast *tl = parse(start); */ /* eval(tl); */ - eval(parse(start)); + pr(eval(parse(start), env)); //debug_ast(tl, 1); /* start anew: */