/* See LICENSE file for copyright and license details. */ #include #include #include #include "utf.h" #include "util.h" /* token types for lexing/parsing * single character operators represent themselves */ enum { VAL = CHAR_MAX + 1, GE, LE, NE }; typedef struct { char *s; /* iff s is NULL, Val is an integer */ intmax_t n; } Val; static size_t intlen; static void enan(Val v) { if (v.s) enprintf(2, "syntax error: expected integer got `%s'\n", v.s); } static void ezero(intmax_t n) { if (n == 0) enprintf(2, "division by zero\n"); } static char * valstr(Val val, char *buf, size_t bufsiz) { if (val.s) return val.s; snprintf(buf, bufsiz, "%"PRIdMAX, val.n); return buf; } static int valcmp(Val a, Val b) { char buf1[intlen], buf2[intlen]; char *astr = valstr(a, buf1, sizeof(buf1)); char *bstr = valstr(b, buf2, sizeof(buf2)); if (!a.s && !b.s) return (a.n > b.n) - (a.n < b.n); return strcmp(astr, bstr); } /* match vstr against BRE vregx (treat both values as strings) * if there is at least one subexpression \(...\) * then return the text matched by it \1 (empty string for no match) * else return number of characters matched (0 for no match) */ static Val match(Val vstr, Val vregx) { regex_t re; regmatch_t matches[2]; intmax_t d; char *s, *p, buf1[intlen], buf2[intlen]; char *str = valstr(vstr, buf1, sizeof(buf1)); char *regx = valstr(vregx, buf2, sizeof(buf2));; char anchreg[strlen(regx) + 2]; /* expr(1p) "all patterns are anchored to the beginning of the string" */ snprintf(anchreg, sizeof(anchreg), "^%s", regx); enregcomp(3, &re, anchreg, 0); if (regexec(&re, str, 2, matches, 0)) { regfree(&re); return (Val){ (re.re_nsub ? "" : NULL), 0 }; } if (re.re_nsub) { regfree(&re); s = str + matches[1].rm_so; p = str + matches[1].rm_eo; *p = '\0'; d = strtoimax(s, &p, 10); if (*s && !*p) /* string matched by subexpression is an integer */ return (Val){ NULL, d }; /* FIXME? string is never free()d, worth fixing? * need to allocate as it could be in buf1 instead of vstr.s */ return (Val){ enstrdup(3, s), 0 }; } regfree(&re); str += matches[0].rm_so; return (Val){ NULL, utfnlen(str, matches[0].rm_eo - matches[0].rm_so) }; } /* ops points to a stack of operators, opp points to one past the last op * vals points to a stack of values , valp points to one past the last val * guaranteed that opp != ops * ops is unused here, but still included for parity with vals * pop operator, pop two values, apply operator, push result */ static void doop(int *ops, int **opp, Val *vals, Val **valp) { Val ret, a, b; int op; /* For an operation, we need a valid operator * and two values on the stack */ if ((*opp)[-1] == '(') enprintf(2, "syntax error: extra (\n"); if (*valp - vals < 2) enprintf(2, "syntax error: missing expression or extra operator\n"); a = (*valp)[-2]; b = (*valp)[-1]; op = (*opp)[-1]; switch (op) { case '|': if ( a.s && *a.s) ret = (Val){ a.s , 0 }; else if (!a.s && a.n) ret = (Val){ NULL, a.n }; else if ( b.s && *b.s) ret = (Val){ b.s , 0 }; else ret = (Val){ NULL, b.n }; break; case '&': if (((a.s && *a.s) || a.n) && ((b.s && *b.s) || b.n)) ret = a; else ret = (Val){ NULL, 0 }; break; case '=': ret = (Val){ NULL, valcmp(a, b) == 0 }; break; case '>': ret = (Val){ NULL, valcmp(a, b) > 0 }; break; case GE : ret = (Val){ NULL, valcmp(a, b) >= 0 }; break; case '<': ret = (Val){ NULL, valcmp(a, b) < 0 }; break; case LE : ret = (Val){ NULL, valcmp(a, b) <= 0 }; break; case NE : ret = (Val){ NULL, valcmp(a, b) != 0 }; break; case '+': enan(a); enan(b); ret = (Val){ NULL, a.n + b.n }; break; case '-': enan(a); enan(b); ret = (Val){ NULL, a.n - b.n }; break; case '*': enan(a); enan(b); ret = (Val){ NULL, a.n * b.n }; break; case '/': enan(a); enan(b); ezero(b.n); ret = (Val){ NULL, a.n / b.n }; break; case '%': enan(a); enan(b); ezero(b.n); ret = (Val){ NULL, a.n % b.n }; break; case ':': ret = match(a, b); break; } (*valp)[-2] = ret; (*opp)--; (*valp)--; } /* retrn the type of the next token, s * if it is a value, place the value in v for use by parser */ static int lex(char *s, Val *v) { intmax_t d; char *p, *ops = "|&=><+-*/%():"; /* clean integer */ d = strtoimax(s, &p, 10); if (*s && !*p) { *v = (Val){ NULL, d }; return VAL; } /* one-char operand */ if (*s && !s[1] && strchr(ops, *s)) return *s; /* two-char operand */ if (!strcmp(s, ">=")) return GE; if (!strcmp(s, "<=")) return LE; if (!strcmp(s, "!=")) return NE; /* nothing matched, treat as string */ *v = (Val){ s, 0 }; return VAL; } /* using shunting-yard to convert from infix to rpn * https://en.wikipedia.org/wiki/Shunting-yard_algorithm * instead of creating rpn output to evaluate later, evaluate it immediately as * it is created * vals is the value stack, valp points to one past last value on the stack * ops is the operator stack, opp points to one past last op on the stack */ static int parse(char *expr[], int exprlen) { Val vals[exprlen], *valp = vals, v; int ops[exprlen], *opp = ops; int i, type, lasttype = 0; char prec[] = { /* precedence of operators */ ['|'] = 1, ['&'] = 2, ['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3, ['+'] = 4, ['-'] = 4, ['*'] = 5, ['/'] = 5, ['%'] = 5, [':'] = 6, }; for (i = 0; i < exprlen; i++) { switch ((type = lex(expr[i], &v))) { case VAL: *valp++ = v; break; case '(': *opp++ = '('; break; case ')': if (lasttype == '(') enprintf(2, "syntax error: empty ( )\n"); while (opp > ops && opp[-1] != '(') doop(ops, &opp, vals, &valp); if (opp == ops) enprintf(2, "syntax error: extra )\n"); opp--; break; default: /* operator */ if (prec[lasttype]) enprintf(2, "syntax error: extra operator\n"); while (opp > ops && prec[opp[-1]] >= prec[type]) doop(ops, &opp, vals, &valp); *opp++ = type; break; } lasttype = type; } while (opp > ops) doop(ops, &opp, vals, &valp); if (valp == vals) enprintf(2, "syntax error: missing expression\n"); if (--valp != vals) enprintf(2, "syntax error: extra expression\n"); if (valp->s) printf("%s\n", valp->s); else printf("%"PRIdMAX"\n", valp->n); return (valp->s && *valp->s) || valp->n; } /* the only way to get usage() is if the user didn't supply -- and expression * begins with a - * expr(1p): "... the conforming application must employ the -- construct ... * if there is any chance the first operand might be a negative integer (or any * string with a leading minus" */ static void usage(void) { enprintf(3, "usage: %s [--] expression\n" "note : the -- is mandatory if expression begins with a -\n", argv0); } int main(int argc, char *argv[]) { intmax_t n = INTMAX_MIN; /* Get the maximum number of digits (+ sign) */ for (intlen = (n < 0); n; n /= 10, ++intlen) ; ARGBEGIN { default: usage(); } ARGEND; return !parse(argv, argc); }