llm/tokenize.l

450 lines
11 KiB
Plaintext

/* llm - litle lisp machine:
* Author: Christian Barthel <bch@online.de>
* Compile: flex tokenize.l
* cc lex.yy.c -lfl
*/
%{
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <err.h>
struct token;
struct token {
int type;
union {
char *str;
int num;
} v;
struct token *next;
};
struct ast;
struct ast {
int type;
union {
struct token *token;
struct ast *list;
} v;
struct ast *next;
};
struct env;
struct env {
char *name;
struct ast *a;
struct env *parent;
struct env *next;
};
enum asttype {
AST_TOK = 400,
AST_LIST = 401,
AST_QUOTE = 402
};
enum yytokentype {
NUM = 258,
LPAR = 259,
RPAR = 260,
SYM = 261,
STR = 262,
EOL = 263,
BOOL = 264
};
struct token* make_token(enum yytokentype, int, char*);
struct ast* eval(struct ast *a, struct env *e);
int yylval;
char *yystr;
%}
%%
"(" { return LPAR; }
")" { return RPAR; }
t { yylval = 1; return BOOL;}
nil { yylval = 0; return BOOL; }
[0-9]+ { yylval = atoi(yytext); return NUM; }
[ \t\n] { /* ignore white space */ }
\".*\" { return STR; }
[a-zA-Z][a-zA-Z0-9]* { return SYM; }
. { err(1, "invalid symbol: %s\n", yytext); }
%%
/*
* Goal: write LISP read-eval-print Loop and support
* @ Functions
* t,nil boolean (nand 0 1) -> boolean
* "str" Strings (streq str1 str2) -> boolean
* abc Symboles (symeq s1 s2) -> boolean
* 1234 Number/Integer [add,inv] -> num
* [lt] -> boolean
* [num-to-bool] -> boolean
*
* (def <var> <symbol,number,#function)
* (lambda (<symbol> ..) (<sym..> ))
* (if <bool> <body> <else>)
* (quote a) -> a
*/
void
debug_token(struct token *x, int indent) {
for (int i = 0; i < indent; i++) putc(' ', stderr);
fprintf(stderr, "%d: (%p)", x->type, x);
if (x->type == NUM) fprintf(stderr, "%d @ %p", x->v.num, x);
if (x->type == STR || x->type == SYM)
fprintf(stderr, "%s", x->v.str);
putc('\n', stderr);
}
void
debug_ast(struct ast *a, int indent)
{
while (a) {
for (int i = 0; i < indent; i++) putc(' ', stderr);
fprintf(stderr, "AST: %s,a=%p,list=%p,token=%p,next=%p", (a->type == AST_TOK) ? "AST_TOK" :
"AST_LIST", a, a->v.list, a->v.token, a->next);
putc('\n', stderr);
if (a->type == AST_LIST) {
fprintf(stderr, "enter %p\n", a->v.list);
fprintf(stderr, "==\n");
debug_ast(a->v.list, indent+4);
fprintf(stderr, "==\n");
}
else if(a->type == AST_TOK)
debug_token(a->v.token, indent);
a = a->next;
}
}
struct token*
make_token(enum yytokentype type, int num, char* str)
{
struct token *t =
(struct token*) calloc(1, sizeof(struct token));
if (t == NULL)
err(1, "malloc failed");
t->type = type;
if (type == NUM || type == BOOL)
t->v.num = num;
else if (type == SYM || type == STR)
t->v.str = strdup(str);
return t;
}
struct ast*
make_ast(enum asttype type, struct token *t)
{
struct ast *a =
(struct ast*) calloc(1, sizeof(struct ast));
if (a == NULL)
err(1, "malloc failed");
a->type = type;
if (type == AST_TOK)
a->v.token = t;
/* else if (type == AST_LIST) */
/* a->v.list = n; */
return a;
}
struct env*
make_env(char *name, struct ast *a)
{
struct env *e =
(struct env*) calloc(1, sizeof(struct env));
if (e == NULL)
err(1, "malloc failed");
e->name = name;
e->a = a;
return e;
}
int token_is_num(struct token *t) {return t->type == NUM;}
int token_is_sym(struct token *t) {return t->type == SYM;}
int token_is_str(struct token *t) {return t->type == STR;}
int token_is_bool(struct token *t) {return t->type == BOOL;}
/* upon parsing a list of tokens like:
* "(" -> "def" -> "(" -> "b" -> ")" -> "c" -> ")"
* care must be taken when entering a new sub-list (b),
* because the processing in the upper level must continue
* where the sub-list ended, i.e. "c" must be the next
* token on this level.
*/
struct token *next;
struct ast*
parse(struct token *t)
{
if (t == NULL)
err(1, "Unexpected token: <NULL>");
if (t->type == LPAR) {
struct ast *head = make_ast(AST_LIST, NULL);
struct ast *a, *p;
t = t->next;
while (t->type != RPAR) {
assert (t != NULL);
a = parse(t);
if (a == NULL)
err(1, "syntax error");
if (head->v.list == NULL)
p = head->v.list = a;
else
p->next = a;
p = a;
t = next; /* skip all tokens that are processed */
assert(t != NULL);
}
assert(t->type == RPAR);
t = t->next;
next = t;
return head;
} else if (t->type == NUM) {
next = t->next;
return make_ast(AST_TOK, t);
} else if (t->type == BOOL) {
next = t->next;
return make_ast(AST_TOK, t);
} else if (t->type == SYM) {
next = t->next;
return make_ast(AST_TOK, t);
} else if (t->type == STR) {
next = t->next;
return make_ast(AST_TOK, t);
}
return NULL;
}
void
append(struct env *e, struct env *a)
{
if (e->next == NULL) {
e->next = a;
return;
} else
append(e->next, a);
}
struct ast *
lookup(struct env *e, char *name)
{
if (e == NULL)
return NULL;
for (struct env *t = e; t != NULL; t = t->next)
if (strcmp(t->name, name) == 0)
return t->a;
return lookup(e->parent, name);
}
/* (def a <ausdruck>) */
struct ast *
eval_def(struct ast *a, struct env *e)
{
a = a->next; /* skrip `def` */
assert((a->next->type == AST_TOK &&
a->next->v.token->type== STR) ||
(a->next->type == AST_TOK &&
a->next->v.token->type == BOOL) ||
(a->next->type == AST_TOK &&
a->next->v.token->type == NUM) ||
(a->next->type == AST_LIST) ||
(a->next->type == AST_TOK &&
a->next->v.token->type== SYM));
append(e, make_env(a->v.token->v.str, eval(a->next, e)));
assert(a->next->next == NULL);
return NULL;
}
/* (nand <ausdruck::bool> <audsruck::bool>) -> BOOL */
struct ast *
eval_nand(struct ast *a, struct env *e)
{
assert (a != NULL &&
a->next != NULL &&
a->next->next != NULL &&
a->next->next->next == NULL);
struct ast *op1 = eval(a->next, e);
struct ast *op2 = eval(a->next->next, e);
assert (op1 != NULL &&
op1->type == AST_TOK &&
op1->v.token->type == BOOL &&
op2 != NULL &&
op2->type == AST_TOK &&
op2->v.token->type == BOOL );
return make_ast(AST_TOK,
make_token(BOOL,
!(
op1->v.token->v.num &
op2->v.token->v.num) ,
NULL));
}
/* (add <ausdruck::NUM> <audsruck::NUM>) -> NUM */
struct ast *
eval_add(struct ast *a, struct env *e)
{
assert (a != NULL &&
a->next != NULL &&
a->next->next != NULL &&
a->next->next->next == NULL);
struct ast *op1 = eval(a->next, e);
struct ast *op2 = eval(a->next->next, e);
assert (op1 != NULL &&
op1->type == AST_TOK &&
op1->v.token->type == NUM &&
op2 != NULL &&
op2->type == AST_TOK &&
op2->v.token->type == NUM);
return make_ast(AST_TOK,
make_token(NUM,
op1->v.token->v.num +
op2->v.token->v.num,
NULL));
}
/* (inv <ausdruck::NUM>) -> NUM */
struct ast *
eval_inv(struct ast *a, struct env *e)
{
assert(a != NULL &&
a->next != NULL &&
a->next->next == NULL);
struct ast *op = eval(a->next, e);
assert(op != NULL &&
op->type == AST_TOK &&
op->v.token->type == NUM &&
op->next == NULL);
return make_ast(AST_TOK,
make_token(NUM,
0 -
op->v.token->v.num,
NULL));
}
struct ast *
eval_sym(struct ast *a, struct env *e)
{
if (strcmp(a->v.token->v.str, "def") == 0)
return eval_def(a, e);
else if (strcmp(a->v.token->v.str, "lm") == 0) {
} else if (strcmp(a->v.token->v.str, "if") == 0) {
} else if (strcmp(a->v.token->v.str, "inv") == 0) {
return eval_inv(a, e);
} else if (strcmp(a->v.token->v.str, "add") == 0) {
return eval_add(a, e);
} else if (strcmp(a->v.token->v.str, "nand") == 0) {
return eval_nand(a, e);
} else if (strcmp(a->v.token->v.str, "q") == 0) {
//return make_ast(AST_QUOTE, a->next);
return a->next;
} else {
// fprintf(stderr, "Lookup: %s\n", a->v.token->v.str);
struct ast *var = lookup(e, a->v.token->v.str);
assert(var != NULL); /* variable found? */
return var;
}
/* XXX: nand, eq, streq, symeq, add, sub, div, mul,
* mod, lt, numeq, num2bool
*/
return NULL;
}
struct ast *
eval(struct ast *a, struct env *e)
{
if (a == NULL) return NULL;
switch (a->type) {
case AST_TOK:
if (token_is_num(a->v.token))
return make_ast(AST_TOK, a->v.token);
else if (token_is_sym(a->v.token))
return eval_sym(a, e);
else if (token_is_str(a->v.token))
return make_ast(AST_TOK, a->v.token);
else if (token_is_bool(a->v.token))
return make_ast(AST_TOK, a->v.token);
case AST_LIST:
return eval(a->v.list, e);
default:
fprintf(stderr, "%d\n", a->type);
assert(0);
}
return NULL;
}
void
pr(struct ast *a)
{
if (a == NULL) {
fprintf(stderr, "null %p\n", a);
return;
}
switch (a->type) {
case AST_TOK:
if (a->v.token->type == NUM)
printf("%d\n", a->v.token->v.num);
else if (a->v.token->type == STR)
printf("%s\n", a->v.token->v.str);
else if (a->v.token->type == SYM)
printf("%s\n", a->v.token->v.str);
else if (a->v.token->type == BOOL) {
if (a->v.token->v.num)
printf("t\n");
else
printf("nil\n");
}
}
}
struct token*
read_form()
{
int tok, open = 0;
struct token *t, *u = NULL, *start = NULL;
while((tok = yylex())) {
if (tok == LPAR) open++;
else if (tok == RPAR) open--;
t = make_token(tok, yylval, yytext);
if (start == NULL) start = t;
if (u == NULL) u = t;
else u->next = t;
u = t;
if (!open) {
return start;
}
}
exit(0);
}
int main(void)
{
struct env default_env = { 0 }, *env;
env = &default_env;
env->name = "__dummy";
while (1)
pr(eval(parse(read_form()), env));
return 0;
}
/*
* Local Variables:
* mode: c;
* eval: (message "main()")
* fill-column: 80
* comment-column: 40
* indent-tabs-mode: nil
* tab-width: 2
* c-basic-offset: 2
* End:
*/