llm/llm.l

/* ~~~~~~~~~~~~~~~~ llm - The little LISP machinery ~~~~~~~~~~~~~
 *
 *     Copyright (c) 2019  Christian Barthel <bch@online.de>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the
 * following conditions
 * are met:
 * 1. Redistributions of source code must retain the above
 *    copyright *    notice, this list of conditions and the
 *    following disclaimer.
 * 2. Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials
 *    provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT
 * SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 * OF SUCH DAMAGE.
 *
 * Goal: evaluate simple LISP forms (strong type checking)
 *       with a read-eval-print loop.
 *
 *  Data types:
 *   #fn     Functions
 *   t,nil   boolean        (nand <bool> <bool>)      -> boolean
 *   "str"   Strings
 *   abc     Symboles
 *   1234    Number/Integer (inv <num>)               -> -num
 *                          (add <num> <num>)         -> num
 *                          (lt <num> <num>)          -> boolean
 *                          (num2bool <num>)          -> boolean
 *
 *   Special Forms:
 *     (def <SYM> <ARG:symbol,number,#function>)
 *            Bind ARG to name SYM in current environment.
 *     (lm (<PARAM>) (<BODY>))
 *            create function with one or more PARAMs and
 *            BODY.
 *     (if <BOOL> <CASE_TRUE> <CASE_FALSE>)
 *            Evaluate BOOL and execute CASETRUE if true,
 *            otherwise evaulate CASEFALSE
 *     (quote a)
 *            Return a.
 *
 * Compile: flex tokenize.l && cc lex.yy.c -lfl -o llm
 *          Or: make; ./llm
 *          (tested on FreeBSD, should run on most POSIX
 *           systems)
 * BUGS:
 *     no freeing of memory (yet)
 *
 * Literature:
 *    [1] https://news.ycombinator.com/item?id=8714988
 *          Discussion about "minimal" set of primitives
 *          for a LISP interpreter.
 *    [2] https://stackoverflow.com/questions/3482389/
 *          How many primitives does it take to build a
 *          LISP machine? Ten, seven or five?
 *    [3] http://www.cs.cmu.edu/Groups/AI/html/faqs/lang/ \
 *        lisp/part1/faq-doc-6.html
 *          Discussion about the minimal set of primitives
 *          (see attachment)
 *    [4] https://www.cs.cmu.edu/afs/cs/project/ai-repository \
 *        /ai/lang/lisp/impl/awk/0.html
 *          LISP Interpreter in AWK. (see attachments)
 *    [5] http://norvig.com/lispy.html
 *          Similar project, with Python instead of C
 *    [6] http://norvig.com/lispy2.html
 *           Improved version of [5] in Python. (attachment)
 *    [7] Structure and Interpretation of Computer Programs,
 *        by Abelson, Sussman, and Sussman
 *    [8] FernUniversität Hagen: Logisches und funktionales
 *        Programmieren, by Prof.Dr.Beierle.
 *    [9] http://norvig.com/jscheme.html
 *        Peter Norvig JScheme
 */
%{
#define _POSIX_C_SOURCE 200809L
#define _XOPEN_SOURCE 600

#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <err.h>

struct token;
struct token {
  int           type;
  union {
    char       *str;
    int         num;
  } v;
  struct token *next;
};

struct ast;
struct env;
struct env {
  char       *name;
  struct ast *a;
  struct env *parent;
  struct env *next;
};

struct ast {
  int             type;
  union {
    struct token *token;
    struct ast   *list;
  } v;
  struct ast     *next;
  struct env     *env;
};

enum asttype {
  AST_TOK  = 400,
  AST_LIST = 401,
  AST_QUOTE = 402
};
enum yytokentype {
  NUM    = 258,
  LPAR   = 259,
  RPAR   = 260,
  SYM    = 261,
  STR    = 262,
  EOL    = 263,
  BOOL   = 264
};

struct token* make_token(enum yytokentype, int, char*);
struct ast*   eval(struct ast *a, struct env *e);
struct ast *  lookup(struct env *e, char *name);
void          debug_ast(struct ast *a, int indent);

int   yylval;
char *yystr;
%}

%%
"("                   { return LPAR; }
")"                   { return RPAR; }
t                     { yylval = 1; return BOOL;}
nil                   { yylval = 0; return BOOL; }
[0-9]+                { yylval = atoi(yytext); return NUM; }
;.*                   { /* comment */ }
[ \t\n]               { /* ignore white space */ }
\"[^\"]*\"            { return STR; }
[a-zA-Z][_a-zA-Z0-9]* { return SYM; }
.                     { err(1, "invalid symbol: %s\n", yytext); }
%%

/* ------------------------------------------------------------ */
void
reassure(int promised, const char *fmt, ...)
{
  va_list ap;
  va_start(ap, fmt);
  if (!promised) {
    vfprintf(stderr, fmt, ap);
    fputc('\n', stderr);
    exit(1);
  }
  va_end(ap);
}

void
debug_token(struct token *x, int indent)
{
  for (int i = 0; i < indent; i++) putc(' ', stderr);
  if (x == NULL) {
    fprintf(stderr, "NULL TOKEN\n");
    return;
  }
  fprintf(stderr, "%d: (%p)", x->type, x);
  if (x->type == NUM)
    fprintf(stderr, "%d @ %p", x->v.num, x);
  if (x->type == STR || x->type == SYM)
    fprintf(stderr, "%s", x->v.str);
  putc('\n', stderr);
}

int debug_tval(struct env *e, char *name)
{
  struct ast *a  = lookup(e, name);
  if (a != NULL && a->type == AST_TOK)
    return a->v.token->v.num;
  else
    return -1;
}

void
debug_env(struct env* e) {
  if (e== NULL) return;
  for (struct env* cu = e; cu; cu = cu->next)
    fprintf(stderr, "e: %s %d %p|%p\n",
            cu->name, debug_tval(e,cu->name), cu, e);
  fprintf(stderr, "-p-\n");
  debug_env(e->parent);
}

void
debug_ast(struct ast *a, int indent)
{
  while (a) {
    for (int i = 0; i < indent; i++) putc(' ', stderr);
    fprintf(stderr, "AST: %s,a=%p,list=%p,to=%p,next=%p,env=%p",
            (a->type == AST_TOK) ? "AST_TOK" :
            "AST_LST", a, a->v.list, a->v.token, a->next, a->env);
    putc('\n', stderr);
    if (a->env != NULL)
      debug_env(a->env);
    if (a->type == AST_LIST) {
      fprintf(stderr, "enter %p\n", a->v.list);
      fprintf(stderr, "==\n");
      debug_ast(a->v.list, indent+4);
      fprintf(stderr, "==\n");
    }
    else if(a->type == AST_TOK)
      debug_token(a->v.token, indent);
    a = a->next;
  }
}

/* ------------------------------------------------------------ */
struct token*
make_token(enum yytokentype type, int num, char* str)
{
  struct token *t =
      (struct token*) calloc(1, sizeof(struct token));
  if (t == NULL)
    err(1, "malloc failed");
  t->type = type;
  if (type == NUM || type == BOOL)
    t->v.num = num;
  else if (type == SYM || type == STR)
    t->v.str = strdup(str);
  return t;
}

struct ast*
make_ast(enum asttype type, struct token *t)
{
  struct ast *a = (struct ast*) calloc(1, sizeof(struct ast));
  if (a == NULL)
    err(1, "malloc failed");
  a->type = type;
  if (type == AST_TOK)
    a->v.token = t;
  else if (type == AST_LIST)
    return a;//reassure(0, "XXX unexpected state /*a->v.list=n*/");
  return a;
}

struct env*
make_env(char *name, struct ast *a)
{
   struct env *e = (struct env*) calloc(1, sizeof(struct env));
   if (e == NULL)
     err(1, "malloc failed");
   e->name = name;
   e->a = a;
   return e;
}

/* ------------------------------------------------------------ */
int token_is_num(struct token *t) {return t->type == NUM;}

int token_is_sym(struct token *t) {return t->type == SYM;}

int token_is_str(struct token *t) {return t->type == STR;}

int token_is_bool(struct token *t) {return t->type == BOOL;}

int
token_is_internal(struct token *t)
{
  if (strcmp(t->v.str, "def") == 0)           return 1;
  else if (strcmp(t->v.str, "lm") == 0)       return 1;
  else if (strcmp(t->v.str, "if") == 0)       return 1;
  else if (strcmp(t->v.str, "inv") == 0)      return 1;
  else if (strcmp(t->v.str, "add") == 0)      return 1;
  else if (strcmp(t->v.str, "lt") == 0)       return 1;
  else if (strcmp(t->v.str, "nand") == 0)     return 1;
  else if (strcmp(t->v.str, "q") == 0)        return 1;
  else if (strcmp(t->v.str, "num2bool") == 0) return 1;
  else return 0;
}

/* ------------------------------------------------------------ */
/* Global Variable alarm:
 * upon parsing a list of tokens like:
 *    "(" -> "def" -> "(" -> "b" -> ")" -> "c" -> ")"
 * care must be taken when entering a new sub-list (b),
 * because the processing in the upper level must continue
 * where the sub-list ended, i.e. "c" must be the next
 * token on this level.  I've solved this with a global
 * variable `next` and reset the current symbol after
 * creating a sublist.  Not the ideal way, though.
 */
struct token *next;
struct ast*
parse(struct token *t)
{
  if (t == NULL)
    err(1, "Unexpected token: <NULL>");

 if (t->type == LPAR) {
    struct ast *head = make_ast(AST_LIST, NULL);
    struct ast *a, *p;
    t = t->next;
    while (t->type != RPAR) {
      reassure(t != NULL, "%s: unexpected NULL", __func__);
      a = parse(t);
      reassure(a != NULL, "syntax error");
      if (head->v.list == NULL)
        p = head->v.list = a;
      else
        p->next = a;
      p = a;
      t = next; /* skip all tokens that are processed */
      reassure(t != NULL, "%s: unexpected NULL(2)", __func__);
    }
    reassure(t->type == RPAR,
             "%s: RPAR expected but given: %d",
             __func__, t->type);
    t = t->next;
    next = t;
    return head;
  } else if (t->type == NUM) {
    next = t->next;
    return make_ast(AST_TOK, t);
  } else if (t->type == BOOL) {
    next = t->next;
    return make_ast(AST_TOK, t);
  } else if (t->type == SYM) {
    next = t->next;
    return make_ast(AST_TOK, t);
  } else if (t->type == STR) {
    next = t->next;
    return make_ast(AST_TOK, t);
  }
  return NULL;
}

void
append(struct env *e, struct env *a)
{
  if (e->next == NULL) {
    e->next = a;
    return;
  } else
    append(e->next, a);
}

struct ast *
lookup(struct env *e, char *name)
{
  if (e == NULL)
    return NULL;
  for (struct env *t = e; t != NULL; t = t->next)
    if (strcmp(t->name, name) == 0)
      return t->a;
  return lookup(e->parent, name);
}

/* LISP Form:  (def a <ausdruck>) */
struct ast *
eval_def(struct ast *a, struct env *e)
{
  a = a->next;                             /* skip `def` */

  reassure(a->v.token->type == SYM,        /* must be a symbol */
           "%s: expected SYM, given: %d",
           __func__, a->v.token->type);
  reassure(a != NULL && a->next != NULL,   /* need sth to bind..*/
           "%s: unexpected NULL", __func__);

  reassure((a->next->type == AST_TOK &&    /* check type */
            a->next->v.token->type== STR)   ||
           (a->next->type == AST_TOK &&
            a->next->v.token->type == BOOL) ||
           (a->next->type == AST_TOK &&
            a->next->v.token->type == NUM)  ||
           (a->next->type == AST_LIST)      ||
           (a->next->type == AST_TOK &&
            a->next->v.token->type== SYM),
           "%s: expected STR,BOOL,NUM,SYM as assigned expression",
           __func__);

  char *new = a->v.token->v.str;
  for (struct env *x = e; x; x = x->next)
    reassure(strcmp(x->name, new) != 0,
             "%s: %s already assigned in env=%p",
             __func__, new, e);

  append(e, make_env(new, eval(a->next, e)));

  reassure(a->next->next == NULL,
           "%s: ended with unexpected token", __func__);
  return NULL;
}

/* LISP form:  (if <cond:bool> <expression> <expression>) */
struct ast *
eval_if(struct ast *a, struct env *e)
{
  a = a->next;

  reassure(a != NULL && a->next != NULL,
           "%s: unexpected NULL", __func__);

  struct ast *condition = eval(a, e);

  reassure(condition != NULL &&
           condition->type == AST_TOK &&
           condition->v.token->type == BOOL,
           "%s: expected boolean token", __func__);

  reassure(a->next != NULL && a->next->next != NULL,
           "%s: unexpected expressions",
           __func__);

  if (condition->v.token->v.num == 1)
    return eval(a->next, e);
  else
    return eval(a->next->next, e);
}

/* LISP form: (nand <ausdruck::bool> <audsruck::bool>) -> BOOL */
struct ast *
eval_nand(struct ast *a, struct env *e)
{
  reassure(a != NULL &&
           a->next != NULL &&
           a->next->next != NULL &&
           a->next->next->next == NULL,
           "%s: unexpected NULL", __func__);

  struct ast *op1 = eval(a->next, e);
  struct ast *op2 = eval(a->next->next, e);

  reassure(op1 != NULL &&
           op1->type == AST_TOK &&
           op1->v.token->type == BOOL &&
           op2 != NULL &&
           op2->type == AST_TOK &&
           op2->v.token->type == BOOL,
           "%s: expected <bool> <bool> operands", __func__);
  return make_ast(AST_TOK,
                  make_token(BOOL,
                             !(op1->v.token->v.num &
                               op2->v.token->v.num),
                             NULL));
}

/* LISP form  (add <ausdruck::NUM> <audsruck::NUM>) -> NUM */
struct ast *
eval_add(struct ast *a, struct env *e)
{
  reassure(a != NULL &&
           a->next != NULL &&
           a->next->next != NULL &&
           a->next->next->next == NULL,
           "%s: expected 2 operands", __func__);

  struct ast *op1 = eval(a->next, e);
  struct ast *op2 = eval(a->next->next, e);

  reassure(op1 != NULL &&
           op1->type == AST_TOK &&
           op1->v.token->type == NUM &&
           op2 != NULL &&
           op2->type == AST_TOK &&
           op2->v.token->type == NUM,
           "%s: expected <num> <num> operands", __func__);
  return make_ast(AST_TOK,
                  make_token(NUM,
                             op1->v.token->v.num +
                             op2->v.token->v.num,
                             NULL));
}

/* (lt <ausdruck::NUM> <audsruck::NUM>) -> BOOL*/
struct ast *
eval_lt(struct ast *a, struct env *e)
{
  reassure (a != NULL &&
            a->next != NULL &&
            a->next->next != NULL &&
            a->next->next->next == NULL,
            "%s: expected 2 operands", __func__);

  struct ast *op1 = eval(a->next, e);
  struct ast *op2 = eval(a->next->next, e);

  reassure(op1 != NULL &&
           op1->type == AST_TOK &&
           op1->v.token->type == NUM &&
           op2 != NULL &&
           op2->type == AST_TOK &&
           op2->v.token->type == NUM,
           "%s: expected <NUM> <NUM> operands", __func__);
  return make_ast(AST_TOK,
                  make_token(BOOL,
                             op1->v.token->v.num <
                             op2->v.token->v.num,
                             NULL));
}

/* LISP form:  (inv <ausdruck::NUM>) -> NUM */
struct ast *
eval_inv(struct ast *a, struct env *e)
{
  reassure(a != NULL &&
           a->next != NULL &&
           a->next->next == NULL,
           "%s: expected one argument", __func__);
  struct ast *op = eval(a->next, e);
  reassure(op != NULL &&
           op->type == AST_TOK &&
           op->v.token->type == NUM &&
           op->next == NULL,
           "%s: expected <NUM> argument", __func__);
  return make_ast(AST_TOK,
                  make_token(NUM,
                             0 - op->v.token->v.num,
                             NULL));
}

/* (num2bool <ausdruck::NUM>) -> bool */
struct ast *
eval_num2bool(struct ast *a, struct env *e)
{
  reassure(a != NULL &&
           a->next != NULL &&
           a->next->next == NULL,
           "%s: expected one operand", __func__);
  struct ast *op = eval(a->next, e);
  reassure(op != NULL &&
           op->type == AST_TOK &&
           op->v.token->type == NUM &&
           op->next == NULL,
           "%s: expected <num> operand", __func__);
  return make_ast(AST_TOK,
                  make_token(BOOL,
                             op->v.token->v.num != 0,
                             NULL));
}

struct ast *                            /*  */
eval_lm(struct ast *x, struct env *e)
{
  //debug_ast(x, 0);
  reassure(x != NULL &&
           x->type == AST_TOK &&
           x->v.token != NULL &&
           x->v.token->type == SYM,
           "%s: expected lambda lm", __func__);

  struct ast *l = make_ast(AST_LIST, NULL);
  l->v.list = x;
  l->env = e;

  return l;
}

struct ast *
eval_internal_sym(struct ast *x, struct env *e)
{
  struct ast *a = x->v.list;

  if (strcmp(a->v.token->v.str, "def") == 0)
    return eval_def(a, e);
  else if (strcmp(a->v.token->v.str, "lm") == 0)
    return eval_lm(a, e);
  else if (strcmp(a->v.token->v.str, "if") == 0)
    return eval_if(a, e);
  else if (strcmp(a->v.token->v.str, "inv") == 0)
    return eval_inv(a, e);
  else if (strcmp(a->v.token->v.str, "add") == 0)
    return eval_add(a, e);
  else if (strcmp(a->v.token->v.str, "lt") == 0)
    return eval_lt(a, e);
  else if (strcmp(a->v.token->v.str, "nand") == 0)
    return eval_nand(a, e);
  else if (strcmp(a->v.token->v.str, "q") == 0)
    return a->next;
  else if (strcmp(a->v.token->v.str, "num2bool") == 0)
    return eval_num2bool(a, e);
  return NULL;
}

struct ast *
eval_defined_sym(struct ast *a, struct env *e)
{
  struct ast *var = lookup(e, a->v.token->v.str);
  if (var == NULL)
    debug_env(e);
  reassure(var != NULL,
           "%s: symbol %s not found",
           __func__, a->v.token->v.str);
  return var;
}

struct env *
bind_args(struct token *args,
          struct ast *fnargs_values, struct env *e1, struct env *e)
{
  reassure(args && args->type == LPAR,
           "%s: first argument: %d %s", __func__,
           args->type, args->v.str);
  args = args->next;

  struct ast *a = fnargs_values;
  struct token *t = args;
  struct env *newenv = (struct env*)calloc(1, sizeof(struct env));

  reassure(newenv != NULL,
           "%s: allocation failed for newenv", __func__);

  newenv->name = strdup("__fn");
  newenv->parent = e;

  while (t != NULL && t->type != RPAR) {
    reassure(t != NULL &&
             t->type == SYM &&
             t->v.str != NULL,
             "%s: wrong args param found: %s",
             __func__, t->v.str);
    reassure(a != NULL &&
             (a->type == AST_TOK || a->type == AST_LIST),
             "%s: arg=%s given, but no value present",
             __func__, t->v.str);

    append(newenv,
           make_env(t->v.str, eval(a, e1)));

    t = t->next;
    a = a->next;
  }

  reassure (t->type == RPAR,
            "%s: args formulation suspicious: %d",
            __func__, args->type);
  reassure (a == NULL,
            "%s: actual value given but no arg left",
            __func__);
  return newenv;
}

struct ast *
eval_fn_call(struct ast *a, struct env *e)
{
  struct ast *fn = eval(a, e);

  reassure(fn != NULL &&
           fn->type == AST_LIST &&
           fn->v.list != NULL &&
           fn->v.list->next != NULL &&
           fn->v.list->next->next != NULL &&
           fn->v.list->type == AST_TOK &&
           fn->v.list->v.token != NULL &&
           fn->v.list->v.token->next != NULL,
           "%s: function evaluation returned <NULL> or"
           "wrong form, expected: (lm <param> <body>)", __func__);

  //struct ast   *fn_args      = fn->v.list->next;
  struct ast   *fn_body        = fn->v.list->next->next;
  struct token *args_head      = fn->v.list->v.token->next;
  struct ast   *fn_args_values = a->next;

  struct env *newenv =
    bind_args(args_head, fn_args_values, e, fn->env);

  return eval(fn_body, newenv);
}

struct ast *
eval_list(struct ast *a, struct env *e)
{
  if (token_is_internal(a->v.list->v.token))
    return eval_internal_sym(a, e);
  else
    return eval_fn_call(a->v.list, e);
}

struct ast *
eval(struct ast *a, struct env *e)
{
  if (a == NULL)
    return NULL;

  switch (a->type) {
  case AST_TOK:
    if (token_is_num(a->v.token))
      return make_ast(AST_TOK, a->v.token);
    else if (token_is_sym(a->v.token))
      return eval_defined_sym(a, e);
    else if (token_is_str(a->v.token))
      return make_ast(AST_TOK, a->v.token);
    else if (token_is_bool(a->v.token))
      return make_ast(AST_TOK, a->v.token);
    reassure(0, "Unexptected token found");
  case AST_LIST:
    reassure ((a->v.list->type == AST_TOK &&
               a->v.list->v.token->type == SYM) ||
              (a->v.list->type == AST_LIST),
              "%s: expected LIST or TOK", __func__);
    return eval_list(a, e);
  default:
    reassure(0, "%s: wrong AST Type", __func__);
  }
  return NULL;
}

void
pr(struct ast *a)
{
  if (a == NULL)
    return;

  switch (a->type) {
  case AST_TOK:
    if (a->v.token->type == NUM)
      printf("%d\n", a->v.token->v.num);
    else if (a->v.token->type == STR)
      printf("%s\n", a->v.token->v.str);
    else if (a->v.token->type == SYM)
      printf("%s\n", a->v.token->v.str);
    else if (a->v.token->type == BOOL) {
      if (a->v.token->v.num)
        printf("t\n");
      else
        printf("nil\n");
    }
    break;
  case AST_LIST:
    printf("#fn\n");
    break;
  default:
    err(1, "don't know how to print");
  }
}

struct token*
read_form()
{
  int tok, open = 0;
  struct token *t, *u = NULL, *start = NULL;

  while((tok = yylex())) {
    if (tok == LPAR)      open++;
    else if (tok == RPAR) open--;

    t = make_token(tok, yylval, yytext);

    if (start == NULL) start = t;
    if (u == NULL)     u = t;
    else               u->next = t;
    u = t;

    if (!open)
      return start;
  }
  exit(0);
}

int
main(void)
{
  struct env default_env = { 0 }, *env;
  env = &default_env;
  env->name = "__llm";

  while (1)
    pr(eval(parse(read_form()), env));
  return 0;
}

/*
 * Local Variables:
 * mode: c;
 * eval: (message "main()")
 * fill-column: 65
 * comment-column: 40
 * indent-tabs-mode: nil
 * tab-width: 2
 * c-basic-offset: 2
 * End:
 */