[PATCH] Ash support for replace and subsitution

James Simmons jsimmons at infradead.org
Mon Mar 24 09:17:50 PDT 2008


diff -urwN busybox-1.10.0.orig/shell/ash.c busybox-1.10.0/shell/ash.c
--- busybox-1.10.0.orig/shell/ash.c	2008-03-24 11:58:50.000000000 -0400
+++ busybox-1.10.0/shell/ash.c	2008-03-24 12:28:26.000000000 -0400
@@ -476,6 +476,9 @@
 #define VSTRIMLEFT      0x8             /* ${var#pattern} */
 #define VSTRIMLEFTMAX   0x9             /* ${var##pattern} */
 #define VSLENGTH        0xa             /* ${#var} */
+#define VSSUBSTR	0xc		/* ${var:position:length} */
+#define VSREPLACE	0xd		/* ${var/pattern/replacement} */
+#define VSREPLACEALL	0xe		/* ${var//pattern/replacement} */
 
 static const char dolatstr[] ALIGN1 = {
 	CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
@@ -4185,7 +4188,7 @@
 	int quoted = 0;
 	static const char vstype[VSTYPE + 1][4] = {
 		"", "}", "-", "+", "?", "=",
-		"%", "%%", "#", "##"
+		"%", "%%", "#", "##", ":", "/", "//"
 	};
 
 	nextc = makestrspace((strlen(s) + 1) * 8, cmdnextc);
@@ -5681,23 +5684,37 @@
 scanleft(char *startp, char *rmesc, char *rmescend ATTRIBUTE_UNUSED, char *str, int quotes,
 	int zero)
 {
-	char *loc;
-	char *loc2;
+	char *loc, *loc2, *full;
 	char c;
 
 	loc = startp;
 	loc2 = rmesc;
 	do {
-		int match;
+		int match = strlen(str);
 		const char *s = loc2;
+
 		c = *loc2;
 		if (zero) {
 			*loc2 = '\0';
 			s = rmesc;
 		}
-		match = pmatch(str, s);
+
+		// chop off end if its '*'
+		full = rindex(str, '*');
+		if (full && full != str)
+			match--;
+
+		// If str starts with '*' replace with s.
+		if ((*str == '*') && strlen(s) >= match) {
+			full = xstrdup(s);
+			strncpy(full+strlen(s)-match+1, str+1, match-1);
+		} else
+			full = xstrndup(str, match);
+		match = strncmp(s, full, strlen(full));
+		free(full);
+
 		*loc2 = c;
-		if (match)
+		if (!match)
 			return loc;
 		if (quotes && *loc == CTLESC)
 			loc++;
@@ -5760,15 +5777,89 @@
 	ash_msg_and_raise_error("%.*s: %s%s", end - var - 1, var, msg, tail);
 }
 
+static char *
+parse_sub_pattern(char *arg, int inquotes)
+{
+	char *idx, *repl = NULL;
+	unsigned char c;
+
+	for (idx = arg; *arg; arg++) {
+		if (*arg == '/') {
+			/* Only the first '/' seen is our seperator */
+			if (!repl) {
+				*idx++ = '\0';
+				repl = idx;
+			} else
+				*idx++ = *arg;
+			} else if (*arg != '\\') {
+				*idx++ = *arg;
+			} else {
+				if (inquotes)
+					arg++;
+				else {
+					if (*(arg + 1) != '\\')
+						goto single_backslash;
+					arg += 2;
+				}
+
+			switch (*arg) {
+			case 'n':	c = '\n'; break;
+			case 'r':	c = '\r'; break;
+			case 't':	c = '\t'; break;
+			case 'v':	c = '\v'; break;
+			case 'f':	c = '\f'; break;
+			case 'b':	c = '\b'; break;
+			case 'a':	c = '\a'; break;
+			case '\\':
+				if (*(arg + 1) != '\\' && !inquotes)
+					goto single_backslash;
+				arg++;
+				/* FALLTHROUGH */
+			case '\0':
+				/* Trailing backslash, just stuff one in the buffer
+				 * and backup arg so the loop will exit.
+				 */
+				c = '\\';
+				if (!*arg)
+					arg--;
+				break;
+			default:
+				c = *arg;
+				if (isdigit(c)) {
+					/* It's an octal number, parse it. */
+					int i;
+					c = 0;
+
+					for (i = 0; *arg && i < 3; arg++, i++) {
+						if (*arg >= '8' || *arg < '0')
+							ash_msg_and_raise_error("Invalid octal char in pattern");
+						c = (c << 3) + atoi(arg);
+					}
+					/* back off one (so outer loop can do it) */
+					arg--;
+				}
+			}
+			*idx++ = c;
+		}
+	}
+	*idx = *arg;
+
+	return repl;
+
+single_backslash:
+	ash_msg_and_raise_error("single backslash unexpected");
+	/* NOTREACHED */
+}
+
 static const char *
 subevalvar(char *p, char *str, int strloc, int subtype,
 		int startloc, int varflags, int quotes, struct strlist *var_str_list)
 {
-	char *startp;
-	char *loc;
+	char *startp, *repl = NULL;
+	char *loc, null = '\0';
 	int saveherefd = herefd;
 	struct nodelist *saveargbackq = argbackq;
-	int amount;
+	int amount, pos, len, orig_len, workloc, resetloc;
 	char *rmesc, *rmescend;
 	int zero;
 	char *(*scan)(char *, char *, char *, char *, int , int);
@@ -5788,16 +5879,70 @@
 		STADJUST(amount, expdest);
 		return startp;
 
+	case VSSUBSTR:
+		loc = str = stackblock() + strloc;
+		pos = atoi(loc);
+		len = str - startp - 1;
+
+		/* *loc != '\0', guaranteed by parser */
+		if (quotes) {
+			char *ptr;
+
+			/* We must adjust the length by the number of escapes we find. */
+			for(ptr = startp; ptr < (str - 1); ptr++) {
+				if(*ptr == CTLESC) {
+					len--;
+					ptr++;
+				}
+			}
+		}
+		orig_len = len;
+
+		if (*loc++ == ':') {
+			len = atoi(loc);
+		} else {
+			len = orig_len;
+			while (*loc && *loc != ':')
+				loc++;
+			if (*loc++ == ':')
+				len = atoi(loc);
+		}
+		if (pos >= orig_len) {
+			pos = 0;
+			len = 0;
+		}
+		if (len > (orig_len - pos))
+			len = orig_len - pos;
+
+		for (str = startp; pos; str++, pos--) {
+			if (quotes && *str == CTLESC)
+				str++;
+		}
+		for (loc = startp; len; len--) {
+			if (quotes && *str == CTLESC)
+				*loc++ = *str++;
+			*loc++ = *str++;
+		}
+		*loc = '\0';
+		amount = loc - expdest;
+		STADJUST(amount, expdest);
+		return loc;
+
 	case VSQUESTION:
 		varunset(p, str, startp, varflags);
 		/* NOTREACHED */
 	}
+	resetloc = expdest - (char *)stackblock();
 
-	subtype -= VSTRIMRIGHT;
-#if DEBUG
-	if (subtype < 0 || subtype > 3)
-		abort();
-#endif
+	/* We'll comeback here if we grow the stack while handling
+	 * a VSREPLACE or VSREPLACEALL, since our pointers into the
+	 * stack will need rebasing, and we'll need to remove our work
+	 * areas each time
+	 */
+restart:
+	amount = expdest - ((char *)stackblock() + resetloc);
+	STADJUST(-amount, expdest);
+	startp = stackblock() + startloc;
 
 	rmesc = startp;
 	rmescend = stackblock() + strloc;
@@ -5811,7 +5956,91 @@
 	rmescend--;
 	str = stackblock() + strloc;
 	preglob(str, varflags & VSQUOTE, 0);
+	workloc = expdest - (char *)stackblock();
 
+	if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
+		char *idx, *end, *restart_detect;
+
+		if(!repl) {
+			repl = parse_sub_pattern(str, varflags & VSQUOTE);
+			if (!repl)
+				repl = &null;
+		}
+
+		/* If there's no pattern to match, return the expansion unmolested */
+		if (*str == '\0')
+			return 0;
+
+		len = 0;
+		idx = startp;
+		end = str - 1;
+		while (idx < end) {
+			loc = scanright(idx, rmesc, rmescend, str, quotes, 1);
+			if (!loc) {
+				/* No match, advance */
+				restart_detect = stackblock();
+				STPUTC(*idx, expdest);
+				if (quotes && *idx == CTLESC) {
+					idx++;
+					len++;
+					STPUTC(*idx, expdest);
+				}
+				if (stackblock() != restart_detect)
+					goto restart;
+				idx++;
+				len++;
+				rmesc++;
+				continue;
+			}
+
+			if (subtype == VSREPLACEALL) {
+				while (idx < loc) {
+					if (quotes && *idx == CTLESC)
+						idx++;
+					idx++;
+					rmesc++;
+				}
+			} else
+				idx = loc;
+
+			for(loc = repl; *loc; loc++) {
+				restart_detect = stackblock();
+				STPUTC(*loc, expdest);
+				if (stackblock() != restart_detect)
+					goto restart;
+				len++;
+			}
+
+			if (subtype == VSREPLACE) {
+				while (*idx) {
+					restart_detect = stackblock();
+					STPUTC(*idx, expdest);
+					if (stackblock() != restart_detect)
+						goto restart;
+					len++;
+					idx++;
+				}
+				break;
+			}
+		}
+
+		/* We've put the replaced text into a buffer at workloc, now
+		 * move it to the right place and adjust the stack.
+		 */
+		startp = stackblock() + startloc;
+		STPUTC('\0', expdest);
+		memmove(startp, stackblock() + workloc, len);
+		startp[len++] = '\0';
+		amount = expdest - ((char *)stackblock() + startloc + len - 1);
+		STADJUST(-amount, expdest);
+		return startp;
+	}
+
+	subtype -= VSTRIMRIGHT;
+#if DEBUG
+	if (subtype < 0 || subtype > 7)
+		abort();
+#endif
 	/* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */
 	zero = subtype >> 1;
 	/* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */
@@ -6063,6 +6292,9 @@
 	case VSTRIMLEFTMAX:
 	case VSTRIMRIGHT:
 	case VSTRIMRIGHTMAX:
+	case VSSUBSTR:
+	case VSREPLACE:
+	case VSREPLACEALL:
 		break;
 	default:
 		abort();
@@ -10459,8 +10691,13 @@
 		if (subtype == 0) {
 			switch (c) {
 			case ':':
-				flags = VSNUL;
 				c = pgetc();
+				if (c == ':' || c == '$' || isdigit(c)) {
+					pungetc();
+					subtype = VSSUBSTR;
+					break;
+				} else
+					flags = VSNUL;
 				/*FALLTHROUGH*/
 			default:
 				p = strchr(types, c);
@@ -10481,6 +10718,14 @@
 						pungetc();
 					break;
 				}
+			case '/':
+				subtype = VSREPLACE;
+				c = pgetc();
+				if (c == '/')
+					subtype++;
+				else
+					pungetc();
+				break;
 			}
 		} else {
 			pungetc();


More information about the busybox mailing list