[BusyBox] Today's ash improvements

Sat Aug 4 10:41:03 MDT 2001

Aaron,
 
> This patch contains three distinct size optimizations:
> 
> 1) is_digit() uses only one comparison now. ((c)>='0' && (c)<='9') was
> replaced with ((unsigned)(c) - '0' <= 9). Neat trick, eh?

Such optimization certainly works, under condition of certainly, that we use
one-byte char.
But I do not like such approach.
 
> 2) The 257 byte is_type table was replaced by a few simple tests.
> is_special was changed from ((is_type+SYNBASE)[c] & (ISSPECL|ISDIGIT))
> to (is_digit(c)||c=='!'||c=='#'||c=='$'||c=='*'||c=='-').

You have missed two more symbols: '?' and '@'.
But idea your excellent.

> 3) tokendlist was changed from a 30 byte array of booleans (chars) to
> a 30 bit bitmask.

This optimization takes into account, that position and quantity tokens will not
change.
Look my variant. In the sum we receive a prize in 300 bytes without loss of an
opportunity
to correct the tokens table.


--w
vodz
-------------- next part --------------
diff -rbu busybox.orig/ash.c busybox/ash.c

--- busybox.orig/ash.c	Sat Aug  4 19:17:44 2001
+++ busybox/ash.c	Sat Aug  4 19:28:51 2001
@@ -156,18 +156,18 @@
 
 #define TEOF 0
 #define TNL 1
-#define TSEMI 2
-#define TBACKGND 3
-#define TAND 4
-#define TOR 5
-#define TPIPE 6
-#define TLP 7
-#define TRP 8
-#define TENDCASE 9
-#define TENDBQUOTE 10
-#define TREDIR 11
-#define TWORD 12
-#define TASSIGN 13
+#define TREDIR 2
+#define TWORD 3
+#define TASSIGN 4
+#define TSEMI 5
+#define TBACKGND 6
+#define TAND 7
+#define TOR 8
+#define TPIPE 9
+#define TLP 10
+#define TRP 11
+#define TENDCASE 12
+#define TENDBQUOTE 13
 #define TNOT 14
 #define TCASE 15
 #define TDO 16
@@ -206,7 +206,7 @@
 #define is_alpha(c)     (((c) < CTLESC || (c) > CTLENDARI) && isalpha((unsigned char) (c)))
 #define is_name(c)      (((c) < CTLESC || (c) > CTLENDARI) && ((c) == '_' || isalpha((unsigned char) (c))))
 #define is_in_name(c)   (((c) < CTLESC || (c) > CTLENDARI) && ((c) == '_' || isalnum((unsigned char) (c))))
-#define is_special(c)   ((is_type+SYNBASE)[c] & (ISSPECL|ISDIGIT))
+#define is_special(c)   (is_digit(c)||c=='!'||c=='#'||c=='$'||c=='*'||c=='-'||c=='?'||c=='@')
 #define digit_val(c)    ((c) - '0')
 
 
@@ -973,140 +973,39 @@
       CWORD
 };
 
-/* character classification table */
-static const char is_type[257] = {
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       0,
-      0,       0,       0,       ISSPECL,
-      0,       ISSPECL, ISSPECL, 0,
-      0,       0,       0,       0,
-      ISSPECL, 0,       0,       ISSPECL,
-      0,       0,       ISDIGIT, ISDIGIT,
-      ISDIGIT, ISDIGIT, ISDIGIT, ISDIGIT,
-      ISDIGIT, ISDIGIT, ISDIGIT, ISDIGIT,
-      0,       0,       0,       0,
-      0,       ISSPECL, ISSPECL, ISUPPER,
-      ISUPPER, ISUPPER, ISUPPER, ISUPPER,
-      ISUPPER, ISUPPER, ISUPPER, ISUPPER,
-      ISUPPER, ISUPPER, ISUPPER, ISUPPER,
-      ISUPPER, ISUPPER, ISUPPER, ISUPPER,
-      ISUPPER, ISUPPER, ISUPPER, ISUPPER,
-      ISUPPER, ISUPPER, ISUPPER, ISUPPER,
-      ISUPPER, 0,       0,       0,
-      0,       ISUNDER, 0,       ISLOWER,
-      ISLOWER, ISLOWER, ISLOWER, ISLOWER,
-      ISLOWER, ISLOWER, ISLOWER, ISLOWER,
-      ISLOWER, ISLOWER, ISLOWER, ISLOWER,
-      ISLOWER, ISLOWER, ISLOWER, ISLOWER,
-      ISLOWER, ISLOWER, ISLOWER, ISLOWER,
-      ISLOWER, ISLOWER, ISLOWER, ISLOWER,
-      ISLOWER, 0,       0,       0,
-      0
-};
-
-/* Array indicating which tokens mark the end of a list */
-static const char tokendlist[] = {
-	1,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	0,
-	1,
-	1,
-	1,
-	0,
-	0,
-	0,
-	0,
-	0,
-	1,
-	1,
-	1,
-	1,
-	1,
-	1,
-	0,
-	0,
-	0,
-	1,
-	0,
-	0,
-	0,
-	1,
-};
-
-static const char *const tokname[] = {
-	"end of file",
-	"newline",
-	"\";\"",
-	"\"&\"",
-	"\"&&\"",
-	"\"||\"",
-	"\"|\"",
-	"\"(\"",
-	"\")\"",
-	"\";;\"",
-	"\"`\"",
-	"redirection",
-	"word",
-	"assignment",
-	"\"!\"",
-	"\"case\"",
-	"\"do\"",
-	"\"done\"",
-	"\"elif\"",
-	"\"else\"",
-	"\"esac\"",
-	"\"fi\"",
-	"\"for\"",
-	"\"if\"",
-	"\"in\"",
-	"\"then\"",
-	"\"until\"",
-	"\"while\"",
-	"\"{\"",
-	"\"}\"",
+/* first char is indicating which tokens mark the end of a list */
+static const char *const tokname_array[] = {
+	"\1end of file",
+	"\0newline",
+	"\0redirection",
+	"\0word",
+	"\0assignment",
+	"\0;",
+	"\0&",
+	"\0&&",
+	"\0||",
+	"\0|",
+	"\0(",
+	"\1)",
+	"\1;;",
+	"\1`",
+	/* next token word see in parsekwd[] */
+	"",
+	"",
+	"\1",
+	"\1",
+	"\1",
+	"\1",
+	"\1",
+	"\1",
+	"",
+	"",
+	"",
+	"\1",
+	"",
+	"",
+	"",
+	"\1",
 };
 
 #define KWDOFFSET 14
@@ -1130,6 +1029,17 @@
 	"}"
 };
 
+static const char *tokname(int tok)
+{
+	static char buf[16];
+
+	if(tok>=TSEMI)
+		buf[0] = '"';
+	sprintf(buf+(tok>=TSEMI), "%s%c",
+		(tok<KWDOFFSET ? tokname_array[tok]+1 : parsekwd[tok-KWDOFFSET]),
+		(tok>=TSEMI ? '"' : 0));
+	return buf;
+}
 
 static int plinno = 1;          /* input line number */
 
@@ -1744,7 +1654,7 @@
 };
 #define NUMBUILTINS  (sizeof (builtincmds) / sizeof (struct builtincmd) )
 
-static const struct builtincmd *DOTCMD = &builtincmds[0];
+#define DOTCMD &builtincmds[0]
 static struct builtincmd *BLTINCMD;
 static struct builtincmd *EXECCMD;
 static struct builtincmd *EVALCMD;
@@ -9346,7 +9256,7 @@
 static union node *simplecmd (void);
 static void parsefname (void);
 static void parseheredoc (void);
-static int peektoken (void);
+static char peektoken (void);
 static int readtoken (void);
 static int xxreadtoken (void);
 static int readtoken1 (int, char const *, char *, int);
@@ -9391,7 +9301,7 @@
 	int tok;
 
 	checkkwd = 2;
-	if (nlflag == 0 && tokendlist[peektoken()])
+	if (nlflag == 0 && peektoken())
 		return NULL;
 	n1 = NULL;
 	for (;;) {
@@ -9434,7 +9344,7 @@
 				tokpushback++;
 			}
 			checkkwd = 2;
-			if (tokendlist[peektoken()])
+			if (peektoken())
 				return n1;
 			break;
 		case TEOF:
@@ -9572,7 +9482,7 @@
 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
 		n1->nbinary.ch1 = list(0);
 		if ((got=readtoken()) != TDO) {
-TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
+TRACE(("expecting DO got %s %s\n", tokname(got), got == TWORD ? wordtext : ""));
 			synexpect(TDO);
 		}
 		n1->nbinary.ch2 = list(0);
@@ -9901,13 +9811,13 @@
 	}
 }
 
-static int
+static char
 peektoken() {
 	int t;
 
 	t = readtoken();
 	tokpushback++;
-	return (t);
+	return tokname_array[t][0];
 }
 
 static int
@@ -9955,7 +9865,7 @@
 
 			if ((pp = findkwd(wordtext))) {
 				lasttoken = t = pp - parsekwd + KWDOFFSET;
-				TRACE(("keyword %s recognized\n", tokname[t]));
+				TRACE(("keyword %s recognized\n", tokname(t)));
 				goto out;
 			}
 		}
@@ -9983,9 +9893,9 @@
 out:
 #ifdef DEBUG
 	if (!alreadyseen)
-	    TRACE(("token %s %s\n", tokname[t], t == TWORD || t == TASSIGN ? wordtext : ""));
+	    TRACE(("token %s %s\n", tokname(t), t == TWORD || t == TASSIGN ? wordtext : ""));
 	else
-	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD || t == TASSIGN ? wordtext : ""));
+	    TRACE(("reread token %s %s\n", tokname(t), t == TWORD || t == TASSIGN ? wordtext : ""));
 #endif
 	return (t);
 }
@@ -10767,13 +10677,11 @@
 	int token;
 {
 	char msg[64];
+	int l;
 
-	if (token >= 0) {
-		snprintf(msg, 64, "%s unexpected (expecting %s)",
-			tokname[lasttoken], tokname[token]);
-	} else {
-		snprintf(msg, 64, "%s unexpected", tokname[lasttoken]);
-	}
+	l = sprintf(msg, "%s unexpected", tokname(lasttoken));
+	if (token >= 0)
+		sprintf(msg+l, " (expecting %s)", tokname(token));
 	synerror(msg);
 	/* NOTREACHED */
 }