summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2012-08-14 15:32:42 -0700
committerElliott Hughes <enh@google.com>2012-08-15 15:39:11 -0700
commitcc213f871bf4c5329eb5eb7a80a0ce9d4a880af8 (patch)
tree614352f0342e88315e758fbf63e35172578f260d
parent53493a9b26e9fb665de590abf3502af94eaf3f44 (diff)
downloadbionic-cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8.zip
bionic-cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8.tar.gz
bionic-cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8.tar.bz2
Switch to the current NetBSD regex implementation.
Change-Id: If32b28dd85d6a7ab8957ab81d19fa4c0de9499d5
-rw-r--r--libc/Android.mk10
-rw-r--r--libc/NOTICE36
-rw-r--r--libc/regex/regerror.c130
-rw-r--r--libc/regex/regfree.c71
-rw-r--r--libc/unistd/creat.c34
-rw-r--r--libc/upstream-netbsd/libc/compat-43/creat.c (renamed from libc/regex/utils.h)45
-rw-r--r--libc/upstream-netbsd/libc/regex/cclass.h (renamed from libc/regex/cclass.h)70
-rw-r--r--libc/upstream-netbsd/libc/regex/cname.h (renamed from libc/regex/cname.h)44
-rw-r--r--libc/upstream-netbsd/libc/regex/engine.c (renamed from libc/regex/engine.c)409
-rw-r--r--libc/upstream-netbsd/libc/regex/regcomp.c (renamed from libc/regex/regcomp.c)923
-rw-r--r--libc/upstream-netbsd/libc/regex/regerror.c223
-rw-r--r--libc/upstream-netbsd/libc/regex/regex2.h (renamed from libc/regex/regex2.h)124
-rw-r--r--libc/upstream-netbsd/libc/regex/regexec.c (renamed from libc/regex/regexec.c)126
-rw-r--r--libc/upstream-netbsd/libc/regex/regfree.c129
-rw-r--r--libc/upstream-netbsd/libc/regex/utils.h91
-rw-r--r--libc/upstream-netbsd/netbsd-compat.h3
-rw-r--r--tests/Android.mk43
-rw-r--r--tests/regex_test.cpp38
18 files changed, 1832 insertions, 717 deletions
diff --git a/libc/Android.mk b/libc/Android.mk
index 7b1aa99..51cef8a 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -9,7 +9,6 @@ libc_common_src_files := \
unistd/abort.c \
unistd/alarm.c \
unistd/brk.c \
- unistd/creat.c \
unistd/daemon.c \
unistd/eventfd.c \
unistd/exec.c \
@@ -328,13 +327,14 @@ libc_common_src_files := \
netbsd/nameser/ns_netint.c \
netbsd/nameser/ns_print.c \
netbsd/nameser/ns_samedomain.c \
- regex/regcomp.c \
- regex/regerror.c \
- regex/regexec.c \
- regex/regfree.c \
libc_upstream_netbsd_src_files := \
+ upstream-netbsd/libc/compat-43/creat.c \
upstream-netbsd/libc/gen/nice.c \
+ upstream-netbsd/libc/regex/regcomp.c \
+ upstream-netbsd/libc/regex/regerror.c \
+ upstream-netbsd/libc/regex/regexec.c \
+ upstream-netbsd/libc/regex/regfree.c \
upstream-netbsd/libc/stdlib/tdelete.c \
upstream-netbsd/libc/stdlib/tfind.c \
upstream-netbsd/libc/stdlib/tsearch.c \
diff --git a/libc/NOTICE b/libc/NOTICE
index 14a826a..8bf4978 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -2211,7 +2211,6 @@ SUCH DAMAGE.
-------------------------------------------------------------------
-Copyright (c) 1992, 1993, 1994 Henry Spencer.
Copyright (c) 1992, 1993, 1994
The Regents of the University of California. All rights reserved.
@@ -2244,6 +2243,41 @@ SUCH DAMAGE.
-------------------------------------------------------------------
+Copyright (c) 1992, 1993, 1994 Henry Spencer.
+
+This code is derived from software contributed to Berkeley by
+Henry Spencer.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+ must display the following acknowledgement:
+ This product includes software developed by the University of
+ California, Berkeley and its contributors.
+4. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
Copyright (c) 1993
The Regents of the University of California. All rights reserved.
diff --git a/libc/regex/regerror.c b/libc/regex/regerror.c
deleted file mode 100644
index 838ec8f..0000000
--- a/libc/regex/regerror.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/* $OpenBSD: regerror.c,v 1.13 2005/08/05 13:03:00 espie Exp $ */
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)regerror.c 8.4 (Berkeley) 3/20/94
- */
-
-#include <sys/types.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <regex.h>
-
-#include "utils.h"
-
-static char *regatoi(const regex_t *, char *, int);
-
-static const struct rerr {
- int code;
- char *name;
- char *explain;
-} rerrs[] = {
- { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
- { REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
- { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
- { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
- { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
- { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
- { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
- { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
- { REG_EBRACE, "REG_EBRACE", "braces not balanced" },
- { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
- { REG_ERANGE, "REG_ERANGE", "invalid character range" },
- { REG_ESPACE, "REG_ESPACE", "out of memory" },
- { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
- { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
- { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
- { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
- { 0, "", "*** unknown regexp error code ***" }
-};
-
-/*
- - regerror - the interface to error numbers
- = extern size_t regerror(int, const regex_t *, char *, size_t);
- */
-/* ARGSUSED */
-size_t
-regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
-{
- const struct rerr *r;
- size_t len;
- int target = errcode &~ REG_ITOA;
- char *s;
- char convbuf[50];
-
- if (errcode == REG_ATOI)
- s = regatoi(preg, convbuf, sizeof convbuf);
- else {
- for (r = rerrs; r->code != 0; r++)
- if (r->code == target)
- break;
-
- if (errcode&REG_ITOA) {
- if (r->code != 0) {
- assert(strlen(r->name) < sizeof(convbuf));
- (void) strlcpy(convbuf, r->name, sizeof convbuf);
- } else
- (void)snprintf(convbuf, sizeof convbuf,
- "REG_0x%x", target);
- s = convbuf;
- } else
- s = r->explain;
- }
-
- len = strlen(s) + 1;
- if (errbuf_size > 0) {
- strlcpy(errbuf, s, errbuf_size);
- }
-
- return(len);
-}
-
-/*
- - regatoi - internal routine to implement REG_ATOI
- */
-static char *
-regatoi(const regex_t *preg, char *localbuf, int localbufsize)
-{
- const struct rerr *r;
-
- for (r = rerrs; r->code != 0; r++)
- if (strcmp(r->name, preg->re_endp) == 0)
- break;
- if (r->code == 0)
- return("0");
-
- (void)snprintf(localbuf, localbufsize, "%d", r->code);
- return(localbuf);
-}
diff --git a/libc/regex/regfree.c b/libc/regex/regfree.c
deleted file mode 100644
index a57eba3..0000000
--- a/libc/regex/regfree.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* $OpenBSD: regfree.c,v 1.7 2005/08/05 13:03:00 espie Exp $ */
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)regfree.c 8.3 (Berkeley) 3/20/94
- */
-
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <regex.h>
-
-#include "utils.h"
-#include "regex2.h"
-
-/*
- - regfree - free everything
- */
-void
-regfree(regex_t *preg)
-{
- struct re_guts *g;
-
- if (preg->re_magic != MAGIC1) /* oops */
- return; /* nice to complain, but hard */
-
- g = preg->re_g;
- if (g == NULL || g->magic != MAGIC2) /* oops again */
- return;
- preg->re_magic = 0; /* mark it invalid */
- g->magic = 0; /* mark it invalid */
-
- if (g->strip != NULL)
- free((char *)g->strip);
- if (g->sets != NULL)
- free((char *)g->sets);
- if (g->setbits != NULL)
- free((char *)g->setbits);
- if (g->must != NULL)
- free(g->must);
- free((char *)g);
-}
diff --git a/libc/unistd/creat.c b/libc/unistd/creat.c
deleted file mode 100644
index 1b14465..0000000
--- a/libc/unistd/creat.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include <unistd.h>
-#include <fcntl.h>
-
-int creat(const char* pathname, mode_t mode)
-{
- return open(pathname, O_WRONLY|O_TRUNC|O_CREAT, mode);
-}
diff --git a/libc/regex/utils.h b/libc/upstream-netbsd/libc/compat-43/creat.c
index 3e184fc..9560bea 100644
--- a/libc/regex/utils.h
+++ b/libc/upstream-netbsd/libc/compat-43/creat.c
@@ -1,13 +1,9 @@
-/* $OpenBSD: utils.h,v 1.4 2003/06/02 20:18:36 millert Exp $ */
+/* $NetBSD: creat.c,v 1.10 2003/08/07 16:42:39 agc Exp $ */
-/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
- * Copyright (c) 1992, 1993, 1994
+/*
+ * Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
- * This code is derived from software contributed to Berkeley by
- * Henry Spencer.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -31,25 +27,26 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * @(#)utils.h 8.3 (Berkeley) 3/20/94
*/
-/* utility definitions */
-#define DUPMAX 255
-#define INFINITY (DUPMAX + 1)
-#define NC (CHAR_MAX - CHAR_MIN + 1)
-typedef unsigned char uch;
-
-/* switch off assertions (if not already off) if no REDEBUG */
-#ifndef REDEBUG
-#ifndef NDEBUG
-#define NDEBUG /* no assertions please */
-#endif
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)creat.c 8.1 (Berkeley) 6/2/93";
+#else
+__RCSID("$NetBSD: creat.c,v 1.10 2003/08/07 16:42:39 agc Exp $");
#endif
+#endif /* LIBC_SCCS and not lint */
+
#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
-/* for old systems with bcopy() but no memmove() */
-#ifdef USEBCOPY
-#define memmove(d, s, c) bcopy(s, d, c)
-#endif
+int
+creat(const char *path, mode_t mode)
+{
+
+ _DIAGASSERT(path != NULL);
+
+ return(open(path, O_WRONLY|O_CREAT|O_TRUNC, mode));
+}
diff --git a/libc/regex/cclass.h b/libc/upstream-netbsd/libc/regex/cclass.h
index d105491..3ab2ccb 100644
--- a/libc/regex/cclass.h
+++ b/libc/upstream-netbsd/libc/regex/cclass.h
@@ -1,7 +1,6 @@
-/* $OpenBSD: cclass.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */
+/* $NetBSD: cclass.h,v 1.7 2003/08/07 16:43:19 agc Exp $ */
/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -35,34 +34,71 @@
* @(#)cclass.h 8.3 (Berkeley) 3/20/94
*/
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cclass.h 8.3 (Berkeley) 3/20/94
+ */
+
/* character-class table */
static const struct cclass {
- char *name;
- char *chars;
- char *multis;
+ const char *name;
+ const char *chars;
+ const char *multis;
} cclasses[] = {
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789", ""} ,
+0123456789", "" },
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
- ""} ,
- { "blank", " \t", ""} ,
+ "" },
+ { "blank", " \t", "" },
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177", ""} ,
- { "digit", "0123456789", ""} ,
+\25\26\27\30\31\32\33\34\35\36\37\177", "" },
+ { "digit", "0123456789", "" },
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- ""} ,
+ "" },
{ "lower", "abcdefghijklmnopqrstuvwxyz",
- ""} ,
+ "" },
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
- ""} ,
+ "" },
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- ""} ,
- { "space", "\t\n\v\f\r ", ""} ,
+ "" },
+ { "space", "\t\n\v\f\r ", "" },
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
- ""} ,
+ "" },
{ "xdigit", "0123456789ABCDEFabcdef",
- ""} ,
+ "" },
{ NULL, 0, "" }
};
diff --git a/libc/regex/cname.h b/libc/upstream-netbsd/libc/regex/cname.h
index b674b68..4b9ef39 100644
--- a/libc/regex/cname.h
+++ b/libc/upstream-netbsd/libc/regex/cname.h
@@ -1,7 +1,6 @@
-/* $OpenBSD: cname.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */
+/* $NetBSD: cname.h,v 1.7 2003/08/07 16:43:19 agc Exp $ */
/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -35,9 +34,46 @@
* @(#)cname.h 8.3 (Berkeley) 3/20/94
*/
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cname.h 8.3 (Berkeley) 3/20/94
+ */
+
/* character-name table */
static const struct cname {
- char *name;
+ const char *name;
char code;
} cnames[] = {
{ "NUL", '\0' },
@@ -135,5 +171,5 @@ static const struct cname {
{ "right-curly-bracket", '}' },
{ "tilde", '~' },
{ "DEL", '\177' },
- { NULL, 0 }
+ { NULL, 0 },
};
diff --git a/libc/regex/engine.c b/libc/upstream-netbsd/libc/regex/engine.c
index eae6ff2..2a800d4 100644
--- a/libc/regex/engine.c
+++ b/libc/upstream-netbsd/libc/regex/engine.c
@@ -1,7 +1,6 @@
-/* $OpenBSD: engine.c,v 1.15 2005/08/05 13:03:00 espie Exp $ */
+/* $NetBSD: engine.c,v 1.24 2012/03/13 21:13:42 christos Exp $ */
/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -35,6 +34,43 @@
* @(#)engine.c 8.5 (Berkeley) 3/20/94
*/
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)engine.c 8.5 (Berkeley) 3/20/94
+ */
+
/*
* The matching engine and friends. This file is #included by regexec.c
* after suitable #defines of a variety of macros used herein, so that
@@ -72,11 +108,11 @@ struct match {
struct re_guts *g;
int eflags;
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
- char *offp; /* offsets work from here */
- char *beginp; /* start of string -- virtual NUL precedes */
- char *endp; /* end of string -- virtual NUL here */
- char *coldp; /* can be no match starting before here */
- char **lastpos; /* [nplus+1] */
+ const char *offp; /* offsets work from here */
+ const char *beginp; /* start of string -- virtual NUL precedes */
+ const char *endp; /* end of string -- virtual NUL here */
+ const char *coldp; /* can be no match starting before here */
+ const char **lastpos; /* [nplus+1] */
STATEVARS;
states st; /* current states */
states fresh; /* states for a fresh start */
@@ -84,13 +120,18 @@ struct match {
states empty; /* empty set of states */
};
-static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int);
-static char *dissect(struct match *, char *, char *, sopno, sopno);
-static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int);
-static char *fast(struct match *, char *, char *, sopno, sopno);
-static char *slow(struct match *, char *, char *, sopno, sopno);
-static states step(struct re_guts *, sopno, sopno, states, int, states);
-#define MAX_RECURSION 100
+/* ========= begin header generated by ./mkh ========= */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* === engine.c === */
+static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
+static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev);
+static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
#define BOL (OUT+1)
#define EOL (BOL+1)
#define BOLEOL (BOL+2)
@@ -101,19 +142,24 @@ static states step(struct re_guts *, sopno, sopno, states, int, states);
#define NONCHAR(c) ((c) > CHAR_MAX)
#define NNONCHAR (CODEMAX-CHAR_MAX)
#ifdef REDEBUG
-static void print(struct match *, char *, states, int, FILE *);
+static void print(struct match *m, char *caption, states st, int ch, FILE *d);
#endif
#ifdef REDEBUG
-static void at(struct match *, char *, char *, char *, sopno, sopno);
+static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
#endif
#ifdef REDEBUG
-static char *pchar(int);
+static char *pchar(int ch);
+#endif
+
+#ifdef __cplusplus
+}
#endif
+/* ========= end header generated by ./mkh ========= */
#ifdef REDEBUG
#define SP(t, s, c) print(m, t, s, c, stdout)
#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
-#define NOTE(str) { if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
+#define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
static int nope = 0;
#else
#define SP(t, s, c) /* nothing */
@@ -123,27 +169,39 @@ static int nope = 0;
/*
- matcher - the actual matching engine
+ == static int matcher(struct re_guts *g, char *string, \
+ == size_t nmatch, regmatch_t pmatch[], int eflags);
*/
static int /* 0 success, REG_NOMATCH failure */
-matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
+matcher(
+ struct re_guts *g,
+ const char *string,
+ size_t nmatch,
+ regmatch_t pmatch[],
int eflags)
{
- char *endp;
- int i;
+ const char *endp;
+ size_t i;
struct match mv;
struct match *m = &mv;
- char *dp;
+ const char *dp;
const sopno gf = g->firststate+1; /* +1 for OEND */
const sopno gl = g->laststate;
- char *start;
- char *stop;
+ const char *start;
+ const char *stop;
+ int error = 0;
+
+ _DIAGASSERT(g != NULL);
+ _DIAGASSERT(string != NULL);
+ /* pmatch checked below */
/* simplify the situation where possible */
if (g->cflags&REG_NOSUB)
nmatch = 0;
if (eflags&REG_STARTEND) {
- start = string + pmatch[0].rm_so;
- stop = string + pmatch[0].rm_eo;
+ _DIAGASSERT(pmatch != NULL);
+ start = string + (size_t)pmatch[0].rm_so;
+ stop = string + (size_t)pmatch[0].rm_eo;
} else {
start = string;
stop = start + strlen(start);
@@ -154,8 +212,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/* prescreening; this does wonders for this rather slow code */
if (g->must != NULL) {
for (dp = start; dp < stop; dp++)
- if (*dp == g->must[0] && stop - dp >= g->mlen &&
- memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen &&
+ memcmp(dp, g->must, g->mlen) == 0)
break;
if (dp == stop) /* we didn't find g->must */
return(REG_NOMATCH);
@@ -180,10 +238,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
for (;;) {
endp = fast(m, start, stop, gf, gl);
if (endp == NULL) { /* a miss */
- free(m->pmatch);
- free(m->lastpos);
- STATETEARDOWN(m);
- return(REG_NOMATCH);
+ error = REG_NOMATCH;
+ goto done;
}
if (nmatch == 0 && !g->backrefs)
break; /* no further info needed */
@@ -206,25 +262,24 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
sizeof(regmatch_t));
if (m->pmatch == NULL) {
- STATETEARDOWN(m);
- return(REG_ESPACE);
+ error = REG_ESPACE;
+ goto done;
}
- for (i = 1; i <= (int)m->g->nsub; i++)
- m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+ for (i = 1; i <= m->g->nsub; i++)
+ m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1;
if (!g->backrefs && !(m->eflags&REG_BACKR)) {
NOTE("dissecting");
dp = dissect(m, m->coldp, endp, gf, gl);
} else {
if (g->nplus > 0 && m->lastpos == NULL)
- m->lastpos = (char **)malloc((g->nplus+1) *
- sizeof(char *));
+ m->lastpos = malloc((g->nplus+1) *
+ sizeof(const char *));
if (g->nplus > 0 && m->lastpos == NULL) {
- free(m->pmatch);
- STATETEARDOWN(m);
- return(REG_ESPACE);
+ error = REG_ESPACE;
+ goto done;
}
NOTE("backref dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
}
if (dp != NULL)
break;
@@ -242,12 +297,12 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/* try it on a shorter possibility */
#ifndef NDEBUG
for (i = 1; i <= m->g->nsub; i++) {
- assert(m->pmatch[i].rm_so == -1);
- assert(m->pmatch[i].rm_eo == -1);
+ assert(m->pmatch[i].rm_so == (regoff_t)-1);
+ assert(m->pmatch[i].rm_eo == (regoff_t)-1);
}
#endif
NOTE("backoff dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
}
assert(dp == NULL || dp == endp);
if (dp != NULL) /* found a shorter one */
@@ -255,54 +310,72 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[],
/* despite initial appearances, there is no match here */
NOTE("false alarm");
- if (m->coldp == stop)
- break;
start = m->coldp + 1; /* recycle starting later */
+ assert(start <= stop);
}
/* fill in the details if requested */
if (nmatch > 0) {
+ _DIAGASSERT(pmatch != NULL);
pmatch[0].rm_so = m->coldp - m->offp;
pmatch[0].rm_eo = endp - m->offp;
}
if (nmatch > 1) {
assert(m->pmatch != NULL);
- for (i = 1; i < (ssize_t)nmatch; i++)
- if (i <= (int)m->g->nsub)
+ for (i = 1; i < nmatch; i++)
+ if (i <= m->g->nsub)
pmatch[i] = m->pmatch[i];
else {
- pmatch[i].rm_so = -1;
- pmatch[i].rm_eo = -1;
+ pmatch[i].rm_so = (regoff_t)-1;
+ pmatch[i].rm_eo = (regoff_t)-1;
}
}
- if (m->pmatch != NULL)
- free((char *)m->pmatch);
- if (m->lastpos != NULL)
- free((char *)m->lastpos);
+done:
+ if (m->pmatch != NULL) {
+ free(m->pmatch);
+ m->pmatch = NULL;
+ }
+ if (m->lastpos != NULL) {
+ free(m->lastpos);
+ m->lastpos = NULL;
+ }
STATETEARDOWN(m);
- return(0);
+ return error;
}
/*
- dissect - figure out what matched what, no back references
+ == static const char *dissect(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* == stop (success) always */
-dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+static const char * /* == stop (success) always */
+dissect(
+ struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
int i;
sopno ss; /* start sop of current subRE */
sopno es; /* end sop of current subRE */
- char *sp; /* start of string matched by it */
- char *stp; /* string matched by it cannot pass here */
- char *rest; /* start of rest of string */
- char *tail; /* string unmatched by rest of RE */
+ const char *sp; /* start of string matched by it */
+ const char *stp; /* string matched by it cannot pass here */
+ const char *rest; /* start of rest of string */
+ const char *tail; /* string unmatched by rest of RE */
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
- char *ssp; /* start of string matched by subsubRE */
- char *sep; /* end of string matched by subsubRE */
- char *oldssp; /* previous ssp */
- char *dp;
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *sep; /* end of string matched by subsubRE */
+ const char *oldssp; /* previous ssp */
+#ifndef NDEBUG
+ const char *dp;
+#endif
+
+ _DIAGASSERT(m != NULL);
+ _DIAGASSERT(start != NULL);
+ _DIAGASSERT(stop != NULL);
AT("diss", start, stop, startst, stopst);
sp = start;
@@ -361,7 +434,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
esub = es - 1;
/* did innards match? */
if (slow(m, sp, rest, ssub, esub) != NULL) {
- dp = dissect(m, sp, rest, ssub, esub);
+#ifdef NDEBUG
+ (void)
+#else
+ dp =
+#endif
+ dissect(m, sp, rest, ssub, esub);
assert(dp == rest);
} else /* no */
assert(sp == rest);
@@ -399,7 +477,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
}
assert(sep == rest); /* must exhaust substring */
assert(slow(m, ssp, sep, ssub, esub) == rest);
- dp = dissect(m, ssp, sep, ssub, esub);
+#ifdef NDEBUG
+ (void)
+#else
+ dp =
+#endif
+ dissect(m, ssp, sep, ssub, esub);
assert(dp == sep);
sp = rest;
break;
@@ -434,7 +517,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
else
assert(OP(m->g->strip[esub]) == O_CH);
}
- dp = dissect(m, sp, rest, ssub, esub);
+#ifdef NDEBUG
+ (void)
+#else
+ dp =
+#endif
+ dissect(m, sp, rest, ssub, esub);
assert(dp == rest);
sp = rest;
break;
@@ -467,24 +555,35 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- backref - figure out what matched what, figuring in back references
+ == static const char *backref(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst, sopno lev);
*/
-static char * /* == stop (success) or NULL (failure) */
-backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
- sopno lev, int rec) /* PLUS nesting level */
+static const char * /* == stop (success) or NULL (failure) */
+backref(
+ struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst,
+ sopno lev) /* PLUS nesting level */
{
int i;
sopno ss; /* start sop of current subRE */
- char *sp; /* start of string matched by it */
+ const char *sp; /* start of string matched by it */
sopno ssub; /* start sop of subsubRE */
sopno esub; /* end sop of subsubRE */
- char *ssp; /* start of string matched by subsubRE */
- char *dp;
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *dp;
size_t len;
int hard;
sop s;
regoff_t offsave;
cset *cs;
+ _DIAGASSERT(m != NULL);
+ _DIAGASSERT(start != NULL);
+ _DIAGASSERT(stop != NULL);
+
AT("back", start, stop, startst, stopst);
sp = start;
@@ -572,51 +671,50 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
case OBACK_: /* the vilest depths */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
- if (m->pmatch[i].rm_eo == -1)
+ if (m->pmatch[i].rm_eo == (regoff_t)-1)
return(NULL);
- assert(m->pmatch[i].rm_so != -1);
- len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
- if (len == 0 && rec++ > MAX_RECURSION)
+ assert(m->pmatch[i].rm_so != (regoff_t)-1);
+ len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so);
+ if (len == 0)
return(NULL);
assert(stop - m->beginp >= len);
if (sp > stop - len)
return(NULL); /* not enough left to match */
- ssp = m->offp + m->pmatch[i].rm_so;
+ ssp = m->offp + (size_t)m->pmatch[i].rm_so;
if (memcmp(sp, ssp, len) != 0)
return(NULL);
while (m->g->strip[ss] != SOP(O_BACK, i))
ss++;
- return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
- break;
+ return(backref(m, sp+len, stop, ss+1, stopst, lev));
+
case OQUEST_: /* to null or not */
- dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp); /* not */
- return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
- break;
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
+
case OPLUS_:
assert(m->lastpos != NULL);
assert(lev+1 <= m->g->nplus);
m->lastpos[lev+1] = sp;
- return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
- break;
+ return(backref(m, sp, stop, ss+1, stopst, lev+1));
+
case O_PLUS:
if (sp == m->lastpos[lev]) /* last pass matched null */
- return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+ return(backref(m, sp, stop, ss+1, stopst, lev-1));
/* try another pass */
m->lastpos[lev] = sp;
- dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
if (dp == NULL)
- return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
- else
- return(dp);
- break;
+ dp = backref(m, sp, stop, ss+1, stopst, lev-1);
+ return(dp);
+
case OCH_: /* find the right one, if any */
ssub = ss + 1;
esub = ss + OPND(s) - 1;
assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */
- dp = backref(m, sp, stop, ssub, esub, lev, rec);
+ dp = backref(m, sp, stop, ssub, esub, lev);
if (dp != NULL)
return(dp);
/* that one missed, try next one */
@@ -631,29 +729,29 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
else
assert(OP(m->g->strip[esub]) == O_CH);
}
- break;
+
case OLPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_so = offsave;
return(NULL);
- break;
+
case ORPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+ dp = backref(m, sp, stop, ss+1, stopst, lev);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_eo = offsave;
return(NULL);
- break;
+
default: /* uh oh */
assert(nope);
break;
@@ -662,24 +760,35 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst,
/* "can't happen" */
assert(nope);
/* NOTREACHED */
- return 0;
+ return NULL;
}
/*
- fast - step through the string at top speed
+ == static const char *fast(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* where tentative match ended, or NULL */
-fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+static const char * /* where tentative match ended, or NULL */
+fast(
+ struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
states st = m->st;
states fresh = m->fresh;
states tmp = m->tmp;
- char *p = start;
+ const char *p = start;
int c = (start == m->beginp) ? OUT : *(start-1);
int lastc; /* previous c */
int flagch;
- int i;
- char *coldp; /* last p after which no match was underway */
+ size_t i;
+ const char *coldp; /* last p after which no match was underway */
+
+ _DIAGASSERT(m != NULL);
+ _DIAGASSERT(start != NULL);
+ _DIAGASSERT(stop != NULL);
CLEAR(st);
SET1(st, startst);
@@ -751,19 +860,30 @@ fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- slow - step through the string more deliberately
+ == static const char *slow(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* where it ended */
-slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
+static const char * /* where it ended */
+slow(
+ struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
states st = m->st;
states empty = m->empty;
states tmp = m->tmp;
- char *p = start;
+ const char *p = start;
int c = (start == m->beginp) ? OUT : *(start-1);
int lastc; /* previous c */
int flagch;
- int i;
- char *matchp; /* last p at which a match ended */
+ size_t i;
+ const char *matchp; /* last p at which a match ended */
+
+ _DIAGASSERT(m != NULL);
+ _DIAGASSERT(start != NULL);
+ _DIAGASSERT(stop != NULL);
AT("slow", start, stop, startst, stopst);
CLEAR(st);
@@ -831,9 +951,21 @@ slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst)
/*
- step - map set of states reachable before char to set reachable after
+ == static states step(struct re_guts *g, sopno start, sopno stop, \
+ == states bef, int ch, states aft);
+ == #define BOL (OUT+1)
+ == #define EOL (BOL+1)
+ == #define BOLEOL (BOL+2)
+ == #define NOTHING (BOL+3)
+ == #define BOW (BOL+4)
+ == #define EOW (BOL+5)
+ == #define CODEMAX (BOL+5) // highest code used
+ == #define NONCHAR(c) ((c) > CHAR_MAX)
+ == #define NNONCHAR (CODEMAX-CHAR_MAX)
*/
static states
-step(struct re_guts *g,
+step(
+ struct re_guts *g,
sopno start, /* start state within strip */
sopno stop, /* state after stop state within strip */
states bef, /* states reachable before */
@@ -847,6 +979,8 @@ step(struct re_guts *g,
sopno look;
int i;
+ _DIAGASSERT(g != NULL);
+
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
s = g->strip[pc];
switch (OP(s)) {
@@ -948,47 +1082,79 @@ step(struct re_guts *g,
#ifdef REDEBUG
/*
- print - print a set of states
+ == #ifdef REDEBUG
+ == static void print(struct match *m, char *caption, states st, \
+ == int ch, FILE *d);
+ == #endif
*/
static void
-print(struct match *m, char *caption, states st, int ch, FILE *d)
+print(
+ struct match *m,
+ char *caption,
+ states st,
+ int ch,
+ FILE *d)
{
struct re_guts *g = m->g;
int i;
int first = 1;
+ _DIAGASSERT(m != NULL);
+ _DIAGASSERT(caption != NULL);
+
if (!(m->eflags&REG_TRACE))
return;
- (void)fprintf(d, "%s", caption);
+ _DIAGASSERT(d != NULL);
+
+ fprintf(d, "%s", caption);
if (ch != '\0')
- (void)fprintf(d, " %s", pchar(ch));
+ fprintf(d, " %s", pchar(ch));
for (i = 0; i < g->nstates; i++)
if (ISSET(st, i)) {
- (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+ fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
first = 0;
}
- (void)fprintf(d, "\n");
+ fprintf(d, "\n");
}
/*
- at - print current situation
+ == #ifdef REDEBUG
+ == static void at(struct match *m, char *title, char *start, char *stop, \
+ == sopno startst, sopno stopst);
+ == #endif
*/
static void
-at(struct match *m, char *title, char *start, char *stop, sopno startst,
+at(
+ struct match *m,
+ char *title,
+ char *start,
+ char *stop,
+ sopno startst,
sopno stopst)
{
+
+ _DIAGASSERT(m != NULL);
+ _DIAGASSERT(title != NULL);
+ _DIAGASSERT(start != NULL);
+ _DIAGASSERT(stop != NULL);
+
if (!(m->eflags&REG_TRACE))
return;
- (void)printf("%s %s-", title, pchar(*start));
- (void)printf("%s ", pchar(*stop));
- (void)printf("%ld-%ld\n", (long)startst, (long)stopst);
+ printf("%s %s-", title, pchar(*start));
+ printf("%s ", pchar(*stop));
+ printf("%ld-%ld\n", (long)startst, (long)stopst);
}
#ifndef PCHARDONE
#define PCHARDONE /* never again */
/*
- pchar - make a character printable
+ == #ifdef REDEBUG
+ == static char *pchar(int ch);
+ == #endif
*
* Is this identical to regchar() over in debug.c? Well, yes. But a
* duplicate here avoids having a debugging-capable regexec.o tied to
@@ -996,7 +1162,8 @@ at(struct match *m, char *title, char *start, char *stop, sopno startst,
* the non-debug compilation anyway, so it doesn't matter much.
*/
static char * /* -> representation */
-pchar(int ch)
+pchar(
+ int ch)
{
static char pbuf[10];
diff --git a/libc/regex/regcomp.c b/libc/upstream-netbsd/libc/regex/regcomp.c
index 19f4790..2644a22 100644
--- a/libc/regex/regcomp.c
+++ b/libc/upstream-netbsd/libc/regex/regcomp.c
@@ -1,6 +1,6 @@
-/* $OpenBSD: regcomp.c,v 1.19 2008/02/23 08:13:07 otto Exp $ */
+/* $NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $ */
+
/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -34,14 +34,67 @@
* @(#)regcomp.c 8.5 (Berkeley) 3/20/94
*/
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
+#else
+__RCSID("$NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $");
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
#include <sys/types.h>
-#include <stdio.h>
-#include <string.h>
+
+#include <assert.h>
#include <ctype.h>
#include <limits.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <regex.h>
+#ifdef __weak_alias
+__weak_alias(regcomp,_regcomp)
+#endif
+
#include "utils.h"
#include "regex2.h"
@@ -53,56 +106,72 @@
* other clumsinesses
*/
struct parse {
- char *next; /* next character in RE */
- char *end; /* end of string (-> NUL normally) */
+ const char *next; /* next character in RE */
+ const char *end; /* end of string (-> NUL normally) */
int error; /* has an error been seen? */
sop *strip; /* malloced strip */
sopno ssize; /* malloced strip size (allocated) */
sopno slen; /* malloced strip length (used) */
- int ncsalloc; /* number of csets allocated */
+ size_t ncsalloc; /* number of csets allocated */
struct re_guts *g;
# define NPAREN 10 /* we need to remember () 1-9 for back refs */
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
sopno pend[NPAREN]; /* -> ) ([0] unused) */
};
-static void p_ere(struct parse *, int);
-static void p_ere_exp(struct parse *);
-static void p_str(struct parse *);
-static void p_bre(struct parse *, int, int);
-static int p_simp_re(struct parse *, int);
-static int p_count(struct parse *);
-static void p_bracket(struct parse *);
-static void p_b_term(struct parse *, cset *);
-static void p_b_cclass(struct parse *, cset *);
-static void p_b_eclass(struct parse *, cset *);
-static char p_b_symbol(struct parse *);
-static char p_b_coll_elem(struct parse *, int);
-static char othercase(int);
-static void bothcases(struct parse *, int);
-static void ordinary(struct parse *, int);
-static void nonnewline(struct parse *);
-static void repeat(struct parse *, sopno, int, int);
-static int seterr(struct parse *, int);
-static cset *allocset(struct parse *);
-static void freeset(struct parse *, cset *);
-static int freezeset(struct parse *, cset *);
-static int firstch(struct parse *, cset *);
-static int nch(struct parse *, cset *);
-static void mcadd(struct parse *, cset *, char *);
-static void mcinvert(struct parse *, cset *);
-static void mccase(struct parse *, cset *);
-static int isinsets(struct re_guts *, int);
-static int samesets(struct re_guts *, int, int);
-static void categorize(struct parse *, struct re_guts *);
-static sopno dupl(struct parse *, sopno, sopno);
-static void doemit(struct parse *, sop, size_t);
-static void doinsert(struct parse *, sop, size_t, sopno);
-static void dofwd(struct parse *, sopno, sop);
-static void enlarge(struct parse *, sopno);
-static void stripsnug(struct parse *, struct re_guts *);
-static void findmust(struct parse *, struct re_guts *);
-static sopno pluscount(struct parse *, struct re_guts *);
+/* ========= begin header generated by ./mkh ========= */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* === regcomp.c === */
+static void p_ere(struct parse *p, int stop, size_t reclimit);
+static void p_ere_exp(struct parse *p, size_t reclimit);
+static void p_str(struct parse *p);
+static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
+static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
+static int p_count(struct parse *p);
+static void p_bracket(struct parse *p);
+static void p_b_term(struct parse *p, cset *cs);
+static void p_b_cclass(struct parse *p, cset *cs);
+static void p_b_eclass(struct parse *p, cset *cs);
+static char p_b_symbol(struct parse *p);
+static char p_b_coll_elem(struct parse *p, int endc);
+static int othercase(int ch);
+static void bothcases(struct parse *p, int ch);
+static void ordinary(struct parse *p, int ch);
+static void nonnewline(struct parse *p);
+static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
+static int seterr(struct parse *p, int e);
+static cset *allocset(struct parse *p);
+static void freeset(struct parse *p, cset *cs);
+static sopno freezeset(struct parse *p, cset *cs);
+static int firstch(struct parse *p, cset *cs);
+static int nch(struct parse *p, cset *cs);
+static void mcadd(struct parse *p, cset *cs, const char *cp);
+#if 0
+static void mcsub(cset *cs, char *cp);
+static int mcin(cset *cs, char *cp);
+static char *mcfind(cset *cs, char *cp);
+#endif
+static void mcinvert(struct parse *p, cset *cs);
+static void mccase(struct parse *p, cset *cs);
+static int isinsets(struct re_guts *g, int c);
+static int samesets(struct re_guts *g, int c1, int c2);
+static void categorize(struct parse *p, struct re_guts *g);
+static sopno dupl(struct parse *p, sopno start, sopno finish);
+static void doemit(struct parse *p, sop op, sopno opnd);
+static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos);
+static void dofwd(struct parse *p, sopno pos, sopno value);
+static int enlarge(struct parse *p, sopno size);
+static void stripsnug(struct parse *p, struct re_guts *g);
+static void findmust(struct parse *p, struct re_guts *g);
+static sopno pluscount(struct parse *p, struct re_guts *g);
+
+#ifdef __cplusplus
+}
+#endif
+/* ========= end header generated by ./mkh ========= */
static char nuls[10]; /* place to point scanner in event of error */
@@ -123,11 +192,11 @@ static char nuls[10]; /* place to point scanner in event of error */
#define NEXTn(n) (p->next += (n))
#define GETNEXT() (*p->next++)
#define SETERROR(e) seterr(p, (e))
-#define REQUIRE(co, e) ((co) || SETERROR(e))
+#define REQUIRE(co, e) (void) ((co) || SETERROR(e))
#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
-#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
+#define MUSTEAT(c, e) (void) (REQUIRE(MORE() && GETNEXT() == (c), e))
#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
-#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
+#define EMIT(op, sopnd) doemit(p, (sop)(op), sopnd)
#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
@@ -142,11 +211,30 @@ static int never = 0; /* for use in asserts; shuts lint up */
#define never 0 /* some <assert.h>s have bugs too */
#endif
+#define MEMLIMIT 0x8000000
+#define MEMSIZE(p) \
+ ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
+ (p)->ncsalloc * sizeof(cset) + \
+ (p)->ssize * sizeof(sop))
+#define RECLIMIT 256
+
/*
- regcomp - interface for parser and compilation
+ = extern int regcomp(regex_t *, const char *, int);
+ = #define REG_BASIC 0000
+ = #define REG_EXTENDED 0001
+ = #define REG_ICASE 0002
+ = #define REG_NOSUB 0004
+ = #define REG_NEWLINE 0010
+ = #define REG_NOSPEC 0020
+ = #define REG_PEND 0040
+ = #define REG_DUMP 0200
*/
int /* 0 success, otherwise REG_something */
-regcomp(regex_t *preg, const char *pattern, int cflags)
+regcomp(
+ regex_t *preg,
+ const char *pattern,
+ int cflags)
{
struct parse pa;
struct re_guts *g;
@@ -159,6 +247,9 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
# define GOODFLAGS(f) ((f)&~REG_DUMP)
#endif
+ _DIAGASSERT(preg != NULL);
+ _DIAGASSERT(pattern != NULL);
+
cflags = GOODFLAGS(cflags);
if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
return(REG_INVARG);
@@ -168,7 +259,7 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
return(REG_INVARG);
len = preg->re_endp - pattern;
} else
- len = strlen((char *)pattern);
+ len = strlen(pattern);
/* do the mallocs early so failure handling is easy */
g = (struct re_guts *)malloc(sizeof(struct re_guts) +
@@ -176,16 +267,16 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
if (g == NULL)
return(REG_ESPACE);
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
- p->strip = (sop *)calloc(p->ssize, sizeof(sop));
+ p->strip = malloc(p->ssize * sizeof(sop));
p->slen = 0;
if (p->strip == NULL) {
- free((char *)g);
+ free(g);
return(REG_ESPACE);
}
/* set things up */
p->g = g;
- p->next = (char *)pattern; /* convenience; we do not modify it */
+ p->next = pattern;
p->end = p->next + len;
p->error = 0;
p->ncsalloc = 0;
@@ -213,11 +304,11 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
EMIT(OEND, 0);
g->firststate = THERE();
if (cflags&REG_EXTENDED)
- p_ere(p, OUT);
+ p_ere(p, OUT, 0);
else if (cflags&REG_NOSPEC)
p_str(p);
else
- p_bre(p, OUT, OUT);
+ p_bre(p, OUT, OUT, 0);
EMIT(OEND, 0);
g->laststate = THERE();
@@ -244,21 +335,32 @@ regcomp(regex_t *preg, const char *pattern, int cflags)
/*
- p_ere - ERE parser top level, concatenation and alternation
+ == static void p_ere(struct parse *p, int stop, size_t reclimit);
*/
static void
-p_ere(struct parse *p, int stop) /* character this ERE should end at */
+p_ere(
+ struct parse *p,
+ int stop, /* character this ERE should end at */
+ size_t reclimit)
{
char c;
- sopno prevback = 0;
- sopno prevfwd = 0;
+ sopno prevback = 0; /* pacify gcc */
+ sopno prevfwd = 0; /* pacify gcc */
sopno conc;
int first = 1; /* is this the first alternative? */
+ _DIAGASSERT(p != NULL);
+
+ if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+ p->error = REG_ESPACE;
+ return;
+ }
+
for (;;) {
/* do a bunch of concatenated expressions */
conc = HERE();
while (MORE() && (c = PEEK()) != '|' && c != stop)
- p_ere_exp(p);
+ p_ere_exp(p, reclimit);
REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
if (!EAT('|'))
@@ -287,9 +389,12 @@ p_ere(struct parse *p, int stop) /* character this ERE should end at */
/*
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ == static void p_ere_exp(struct parse *p, size_t reclimit);
*/
static void
-p_ere_exp(struct parse *p)
+p_ere_exp(
+ struct parse *p,
+ size_t reclimit)
{
char c;
sopno pos;
@@ -298,6 +403,8 @@ p_ere_exp(struct parse *p)
sopno subno;
int wascaret = 0;
+ _DIAGASSERT(p != NULL);
+
assert(MORE()); /* caller should have ensured this */
c = GETNEXT();
@@ -311,7 +418,7 @@ p_ere_exp(struct parse *p)
p->pbegin[subno] = HERE();
EMIT(OLPAREN, subno);
if (!SEE(')'))
- p_ere(p, ')');
+ p_ere(p, ')', reclimit);
if (subno < NPAREN) {
p->pend[subno] = HERE();
assert(p->pend[subno] != 0);
@@ -365,7 +472,7 @@ p_ere_exp(struct parse *p)
ordinary(p, c);
break;
case '{': /* okay as ordinary except if digit follows */
- REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
+ REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT);
/* FALLTHROUGH */
default:
ordinary(p, c);
@@ -377,7 +484,7 @@ p_ere_exp(struct parse *p)
c = PEEK();
/* we call { a repetition if followed by a digit */
if (!( c == '*' || c == '+' || c == '?' ||
- (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
+ (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ))
return; /* no repetition, we're done */
NEXT();
@@ -406,14 +513,14 @@ p_ere_exp(struct parse *p)
case '{':
count = p_count(p);
if (EAT(',')) {
- if (isdigit((uch)PEEK())) {
+ if (isdigit((unsigned char)PEEK())) {
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
count2 = INFINITY;
} else /* just a single number */
count2 = count;
- repeat(p, pos, count, count2);
+ repeat(p, pos, count, count2, 0);
if (!EAT('}')) { /* error heuristics */
while (MORE() && PEEK() != '}')
NEXT();
@@ -427,17 +534,22 @@ p_ere_exp(struct parse *p)
return;
c = PEEK();
if (!( c == '*' || c == '+' || c == '?' ||
- (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
+ (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) )
return;
SETERROR(REG_BADRPT);
}
/*
- p_str - string (no metacharacters) "parser"
+ == static void p_str(struct parse *p);
*/
static void
-p_str(struct parse *p)
+p_str(
+ struct parse *p)
{
+
+ _DIAGASSERT(p != NULL);
+
REQUIRE(MORE(), REG_EMPTY);
while (MORE())
ordinary(p, GETNEXT());
@@ -445,6 +557,8 @@ p_str(struct parse *p)
/*
- p_bre - BRE parser top level, anchoring and concatenation
+ == static void p_bre(struct parse *p, int end1, \
+ == int end2, size_t reclimit);
* Giving end1 as OUT essentially eliminates the end1/end2 check.
*
* This implementation is a bit of a kludge, in that a trailing $ is first
@@ -454,21 +568,32 @@ p_str(struct parse *p)
* The amount of lookahead needed to avoid this kludge is excessive.
*/
static void
-p_bre(struct parse *p,
+p_bre(
+ struct parse *p,
int end1, /* first terminating character */
- int end2) /* second terminating character */
+ int end2, /* second terminating character */
+ size_t reclimit)
{
- sopno start = HERE();
+ sopno start;
int first = 1; /* first subexpression? */
int wasdollar = 0;
+ _DIAGASSERT(p != NULL);
+
+ if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+ p->error = REG_ESPACE;
+ return;
+ }
+
+ start = HERE();
+
if (EAT('^')) {
EMIT(OBOL, 0);
p->g->iflags |= USEBOL;
p->g->nbol++;
}
while (MORE() && !SEETWO(end1, end2)) {
- wasdollar = p_simp_re(p, first);
+ wasdollar = p_simp_re(p, first, reclimit);
first = 0;
}
if (wasdollar) { /* oops, that was a trailing anchor */
@@ -483,26 +608,30 @@ p_bre(struct parse *p,
/*
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
+ == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
*/
static int /* was the simple RE an unbackslashed $? */
-p_simp_re(struct parse *p,
- int starordinary) /* is a leading * an ordinary character? */
+p_simp_re(
+ struct parse *p,
+ int starordinary, /* is a leading * an ordinary character? */
+ size_t reclimit)
{
int c;
int count;
int count2;
- sopno pos;
- int i;
+ sopno pos, i;
sopno subno;
# define BACKSL (1<<CHAR_BIT)
+ _DIAGASSERT(p != NULL);
+
pos = HERE(); /* repetion op, if any, covers from here */
assert(MORE()); /* caller should have ensured this */
c = GETNEXT();
if (c == '\\') {
REQUIRE(MORE(), REG_EESCAPE);
- c = BACKSL | GETNEXT();
+ c = BACKSL | (unsigned char)GETNEXT();
}
switch (c) {
case '.':
@@ -525,7 +654,7 @@ p_simp_re(struct parse *p,
EMIT(OLPAREN, subno);
/* the MORE here is an error heuristic */
if (MORE() && !SEETWO('\\', ')'))
- p_bre(p, '\\', ')');
+ p_bre(p, '\\', ')', reclimit);
if (subno < NPAREN) {
p->pend[subno] = HERE();
assert(p->pend[subno] != 0);
@@ -564,7 +693,7 @@ p_simp_re(struct parse *p,
REQUIRE(starordinary, REG_BADRPT);
/* FALLTHROUGH */
default:
- ordinary(p, (char)c);
+ ordinary(p, c &~ BACKSL);
break;
}
@@ -577,21 +706,21 @@ p_simp_re(struct parse *p,
} else if (EATTWO('\\', '{')) {
count = p_count(p);
if (EAT(',')) {
- if (MORE() && isdigit((uch)PEEK())) {
+ if (MORE() && isdigit((unsigned char)PEEK())) {
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
count2 = INFINITY;
} else /* just a single number */
count2 = count;
- repeat(p, pos, count, count2);
+ repeat(p, pos, count, count2, 0);
if (!EATTWO('\\', '}')) { /* error heuristics */
while (MORE() && !SEETWO('\\', '}'))
NEXT();
REQUIRE(MORE(), REG_EBRACE);
SETERROR(REG_BADBR);
}
- } else if (c == '$') /* $ (but not \$) ends it */
+ } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
return(1);
return(0);
@@ -599,14 +728,18 @@ p_simp_re(struct parse *p,
/*
- p_count - parse a repetition count
+ == static int p_count(struct parse *p);
*/
static int /* the value */
-p_count(struct parse *p)
+p_count(
+ struct parse *p)
{
int count = 0;
int ndigits = 0;
- while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
+ _DIAGASSERT(p != NULL);
+
+ while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) {
count = count*10 + (GETNEXT() - '0');
ndigits++;
}
@@ -617,33 +750,37 @@ p_count(struct parse *p)
/*
- p_bracket - parse a bracketed character list
+ == static void p_bracket(struct parse *p);
*
* Note a significant property of this code: if the allocset() did SETERROR,
* no set operations are done.
*/
static void
-p_bracket(struct parse *p)
+p_bracket(
+ struct parse *p)
{
cset *cs;
int invert = 0;
+ _DIAGASSERT(p != NULL);
+
+ cs = allocset(p);
+ if (cs == NULL)
+ return;
/* Dept of Truly Sickening Special-Case Kludges */
- if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
+ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]",
+ (size_t)6) == 0) {
EMIT(OBOW, 0);
NEXTn(6);
return;
}
- if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
+ if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]",
+ (size_t)6) == 0) {
EMIT(OEOW, 0);
NEXTn(6);
return;
}
- if ((cs = allocset(p)) == NULL) {
- /* allocset did set error status in p */
- return;
- }
-
if (EAT('^'))
invert++; /* make note to invert set at end */
if (EAT(']'))
@@ -656,18 +793,16 @@ p_bracket(struct parse *p)
CHadd(cs, '-');
MUSTEAT(']', REG_EBRACK);
- if (p->error != 0) { /* don't mess things up further */
- freeset(p, cs);
+ if (p->error != 0) /* don't mess things up further */
return;
- }
if (p->g->cflags&REG_ICASE) {
- int i;
+ ssize_t i;
int ci;
for (i = p->g->csetsize - 1; i >= 0; i--)
if (CHIN(cs, i) && isalpha(i)) {
- ci = othercase(i);
+ ci = othercase((int)i);
if (ci != i)
CHadd(cs, ci);
}
@@ -675,13 +810,13 @@ p_bracket(struct parse *p)
mccase(p, cs);
}
if (invert) {
- int i;
+ ssize_t i;
for (i = p->g->csetsize - 1; i >= 0; i--)
if (CHIN(cs, i))
- CHsub(cs, i);
+ CHsub(cs, (int)i);
else
- CHadd(cs, i);
+ CHadd(cs, (int)i);
if (p->g->cflags&REG_NEWLINE)
CHsub(cs, '\n');
if (cs->multis != NULL)
@@ -699,23 +834,30 @@ p_bracket(struct parse *p)
/*
- p_b_term - parse one term of a bracketed character list
+ == static void p_b_term(struct parse *p, cset *cs);
*/
static void
-p_b_term(struct parse *p, cset *cs)
+p_b_term(
+ struct parse *p,
+ cset *cs)
{
char c;
char start, finish;
int i;
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+
/* classify what we've got */
switch ((MORE()) ? PEEK() : '\0') {
case '[':
c = (MORE2()) ? PEEK2() : '\0';
break;
+
case '-':
SETERROR(REG_ERANGE);
return; /* NOTE RETURN */
- break;
+
default:
c = '\0';
break;
@@ -762,17 +904,25 @@ p_b_term(struct parse *p, cset *cs)
/*
- p_b_cclass - parse a character-class name and deal with it
+ == static void p_b_cclass(struct parse *p, cset *cs);
*/
static void
-p_b_cclass(struct parse *p, cset *cs)
+p_b_cclass(
+ struct parse *p,
+ cset *cs)
{
- char *sp = p->next;
+ const char *sp;
const struct cclass *cp;
size_t len;
- char *u;
+ const char *u;
char c;
- while (MORE() && isalpha(PEEK()))
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+
+ sp = p->next;
+
+ while (MORE() && isalpha((unsigned char)PEEK()))
NEXT();
len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++)
@@ -793,26 +943,36 @@ p_b_cclass(struct parse *p, cset *cs)
/*
- p_b_eclass - parse an equivalence-class name and deal with it
+ == static void p_b_eclass(struct parse *p, cset *cs);
*
* This implementation is incomplete. xxx
*/
static void
-p_b_eclass(struct parse *p, cset *cs)
+p_b_eclass(
+ struct parse *p,
+ cset *cs)
{
char c;
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+
c = p_b_coll_elem(p, '=');
CHadd(cs, c);
}
/*
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
+ == static char p_b_symbol(struct parse *p);
*/
static char /* value of symbol */
-p_b_symbol(struct parse *p)
+p_b_symbol(
+ struct parse *p)
{
char value;
+ _DIAGASSERT(p != NULL);
+
REQUIRE(MORE(), REG_EBRACK);
if (!EATTWO('[', '.'))
return(GETNEXT());
@@ -825,14 +985,20 @@ p_b_symbol(struct parse *p)
/*
- p_b_coll_elem - parse a collating-element name and look it up
+ == static char p_b_coll_elem(struct parse *p, int endc);
*/
static char /* value of collating element */
-p_b_coll_elem(struct parse *p,
+p_b_coll_elem(
+ struct parse *p,
int endc) /* name ended by endc,']' */
{
- char *sp = p->next;
+ const char *sp;
const struct cname *cp;
- int len;
+ size_t len;
+
+ _DIAGASSERT(p != NULL);
+
+ sp = p->next;
while (MORE() && !SEETWO(endc, ']'))
NEXT();
@@ -852,33 +1018,41 @@ p_b_coll_elem(struct parse *p,
/*
- othercase - return the case counterpart of an alphabetic
+ == static int othercase(int ch);
*/
-static char /* if no counterpart, return ch */
-othercase(int ch)
+static int /* if no counterpart, return ch */
+othercase(
+ int ch)
{
- ch = (uch)ch;
assert(isalpha(ch));
if (isupper(ch))
- return ((uch)tolower(ch));
+ return(tolower(ch));
else if (islower(ch))
- return ((uch)toupper(ch));
+ return(toupper(ch));
else /* peculiar, but could happen */
return(ch);
}
/*
- bothcases - emit a dualcase version of a two-case character
+ == static void bothcases(struct parse *p, int ch);
*
* Boy, is this implementation ever a kludge...
*/
static void
-bothcases(struct parse *p, int ch)
+bothcases(
+ struct parse *p,
+ int ch)
{
- char *oldnext = p->next;
- char *oldend = p->end;
+ const char *oldnext;
+ const char *oldend;
char bracket[3];
- ch = (uch)ch;
+ _DIAGASSERT(p != NULL);
+
+ oldnext = p->next;
+ oldend = p->end;
+
assert(othercase(ch) != ch); /* p_bracket() would recurse */
p->next = bracket;
p->end = bracket+2;
@@ -893,33 +1067,50 @@ bothcases(struct parse *p, int ch)
/*
- ordinary - emit an ordinary character
+ == static void ordinary(struct parse *p, int ch);
*/
static void
-ordinary(struct parse *p, int ch)
+ordinary(
+ struct parse *p,
+ int ch)
{
- cat_t *cap = p->g->categories;
+ cat_t *cap;
+
+ _DIAGASSERT(p != NULL);
- if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
- bothcases(p, ch);
+ cap = p->g->categories;
+ if ((p->g->cflags&REG_ICASE) && isalpha((unsigned char) ch)
+ && othercase((unsigned char) ch) != (unsigned char) ch)
+ bothcases(p, (unsigned char) ch);
else {
- EMIT(OCHAR, (uch)ch);
- if (cap[ch] == 0)
- cap[ch] = p->g->ncategories++;
+ EMIT(OCHAR, (sopno)(unsigned char)ch);
+ if (cap[ch] == 0) {
+ _DIAGASSERT(__type_fit(unsigned char,
+ p->g->ncategories + 1));
+ cap[ch] = (unsigned char)p->g->ncategories++;
+ }
}
}
/*
- nonnewline - emit REG_NEWLINE version of OANY
+ == static void nonnewline(struct parse *p);
*
* Boy, is this implementation ever a kludge...
*/
static void
-nonnewline(struct parse *p)
+nonnewline(
+ struct parse *p)
{
- char *oldnext = p->next;
- char *oldend = p->end;
+ const char *oldnext;
+ const char *oldend;
char bracket[4];
+ _DIAGASSERT(p != NULL);
+
+ oldnext = p->next;
+ oldend = p->end;
+
p->next = bracket;
p->end = bracket+3;
bracket[0] = '^';
@@ -934,23 +1125,33 @@ nonnewline(struct parse *p)
/*
- repeat - generate code for a bounded repetition, recursively if needed
+ == static void repeat(struct parse *p, sopno start, int from, int to,
+ == size_t reclimit);
*/
static void
-repeat(struct parse *p,
+repeat(
+ struct parse *p,
sopno start, /* operand from here to end of strip */
int from, /* repeated from this number */
- int to) /* to this number of times (maybe INFINITY) */
+ int to, /* to this number of times (maybe INFINITY) */
+ size_t reclimit)
{
- sopno finish = HERE();
+ sopno finish;
# define N 2
# define INF 3
# define REP(f, t) ((f)*8 + (t))
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
sopno copy;
- if (p->error != 0) /* head off possible runaway recursion */
+ _DIAGASSERT(p != NULL);
+
+ if (reclimit++ > RECLIMIT)
+ p->error = REG_ESPACE;
+ if (p->error)
return;
+ finish = HERE();
+
assert(from <= to);
switch (REP(MAP(from), MAP(to))) {
@@ -962,7 +1163,7 @@ repeat(struct parse *p,
case REP(0, INF): /* as x{1,}? */
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
INSERT(OCH_, start); /* offset is wrong... */
- repeat(p, start+1, 1, to);
+ repeat(p, start+1, 1, to, reclimit);
ASTERN(OOR1, start);
AHEAD(start); /* ... fix it */
EMIT(OOR2, 0);
@@ -982,7 +1183,7 @@ repeat(struct parse *p,
ASTERN(O_CH, THERETHERE());
copy = dupl(p, start+1, finish+1);
assert(copy == finish+4);
- repeat(p, copy, 1, to-1);
+ repeat(p, copy, 1, to-1, reclimit);
break;
case REP(1, INF): /* as x+ */
INSERT(OPLUS_, start);
@@ -990,11 +1191,11 @@ repeat(struct parse *p,
break;
case REP(N, N): /* as xx{m-1,n-1} */
copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to-1);
+ repeat(p, copy, from-1, to-1, reclimit);
break;
case REP(N, INF): /* as xx{n-1,INF} */
copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to);
+ repeat(p, copy, from-1, to, reclimit);
break;
default: /* "can't happen" */
SETERROR(REG_ASSERT); /* just in case */
@@ -1004,10 +1205,16 @@ repeat(struct parse *p,
/*
- seterr - set an error condition
+ == static int seterr(struct parse *p, int e);
*/
static int /* useless but makes type checking happy */
-seterr(struct parse *p, int e)
+seterr(
+ struct parse *p,
+ int e)
{
+
+ _DIAGASSERT(p != NULL);
+
if (p->error == 0) /* keep earliest error condition */
p->error = e;
p->next = nuls; /* try to bring things to a halt */
@@ -1017,81 +1224,92 @@ seterr(struct parse *p, int e)
/*
- allocset - allocate a set of characters for []
+ == static cset *allocset(struct parse *p);
*/
static cset *
-allocset(struct parse *p)
+allocset(
+ struct parse *p)
{
- int no = p->g->ncsets++;
+ size_t no;
size_t nc;
size_t nbytes;
cset *cs;
- size_t css = (size_t)p->g->csetsize;
- int i;
+ size_t css;
+ size_t i;
- if (no >= p->ncsalloc) { /* need another column of space */
- void *ptr;
+ _DIAGASSERT(p != NULL);
+ no = p->g->ncsets++;
+ css = (size_t)p->g->csetsize;
+ if (no >= p->ncsalloc) { /* need another column of space */
p->ncsalloc += CHAR_BIT;
nc = p->ncsalloc;
assert(nc % CHAR_BIT == 0);
nbytes = nc / CHAR_BIT * css;
-
- ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
- if (ptr == NULL)
- goto nomem;
- p->g->sets = ptr;
-
- ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
- if (ptr == NULL)
- goto nomem;
- p->g->setbits = ptr;
-
- for (i = 0; i < no; i++)
- p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
-
- (void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
+ if (MEMSIZE(p) > MEMLIMIT)
+ goto oomem;
+ if (p->g->sets == NULL)
+ p->g->sets = malloc(nc * sizeof(cset));
+ else
+ p->g->sets = realloc(p->g->sets, nc * sizeof(cset));
+ if (p->g->setbits == NULL)
+ p->g->setbits = malloc(nbytes);
+ else {
+ p->g->setbits = realloc(p->g->setbits, nbytes);
+ /* xxx this isn't right if setbits is now NULL */
+ for (i = 0; i < no; i++)
+ p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
+ }
+ if (p->g->sets != NULL && p->g->setbits != NULL)
+ (void) memset((char *)p->g->setbits + (nbytes - css),
+ 0, css);
+ else {
+oomem:
+ no = 0;
+ SETERROR(REG_ESPACE);
+ /* caller's responsibility not to do set ops */
+ return NULL;
+ }
}
- /* XXX should not happen */
- if (p->g->sets == NULL || p->g->setbits == NULL)
- goto nomem;
cs = &p->g->sets[no];
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
- cs->mask = 1 << ((no) % CHAR_BIT);
+ cs->mask = 1 << (unsigned int)((no) % CHAR_BIT);
cs->hash = 0;
cs->smultis = 0;
cs->multis = NULL;
return(cs);
-nomem:
- free(p->g->sets);
- p->g->sets = NULL;
- free(p->g->setbits);
- p->g->setbits = NULL;
-
- SETERROR(REG_ESPACE);
- /* caller's responsibility not to do set ops */
- return(NULL);
}
/*
- freeset - free a now-unused set
+ == static void freeset(struct parse *p, cset *cs);
*/
static void
-freeset(struct parse *p, cset *cs)
+freeset(
+ struct parse *p,
+ cset *cs)
{
- int i;
- cset *top = &p->g->sets[p->g->ncsets];
- size_t css = (size_t)p->g->csetsize;
+ size_t i;
+ cset *top;
+ size_t css;
+
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
- for (i = 0; i < (ssize_t)css; i++)
- CHsub(cs, i);
+ top = &p->g->sets[p->g->ncsets];
+ css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
+ CHsub(cs, (int)i);
if (cs == top-1) /* recover only the easy case */
p->g->ncsets--;
}
/*
- freezeset - final processing on a set of characters
+ == static int freezeset(struct parse *p, cset *cs);
*
* The main task here is merging identical sets. This is usually a waste
* of time (although the hash code minimizes the overhead), but can win
@@ -1099,23 +1317,32 @@ freeset(struct parse *p, cset *cs)
* is done using addition rather than xor -- all ASCII [aA] sets xor to
* the same value!
*/
-static int /* set number */
-freezeset(struct parse *p, cset *cs)
+static sopno /* set number */
+freezeset(
+ struct parse *p,
+ cset *cs)
{
- uch h = cs->hash;
- int i;
- cset *top = &p->g->sets[p->g->ncsets];
+ uch h;
+ size_t i;
+ cset *top;
cset *cs2;
- size_t css = (size_t)p->g->csetsize;
+ size_t css;
+
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+
+ h = cs->hash;
+ top = &p->g->sets[p->g->ncsets];
+ css = (size_t)p->g->csetsize;
/* look for an earlier one which is the same */
for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
if (cs2->hash == h && cs2 != cs) {
/* maybe */
- for (i = 0; i < (ssize_t)css; i++)
+ for (i = 0; i < css; i++)
if (!!CHIN(cs2, i) != !!CHIN(cs, i))
break; /* no */
- if (i == (ssize_t)css)
+ if (i == css)
break; /* yes */
}
@@ -1124,19 +1351,27 @@ freezeset(struct parse *p, cset *cs)
cs = cs2;
}
- return((int)(cs - p->g->sets));
+ return (sopno)(cs - p->g->sets);
}
/*
- firstch - return first character in a set (which must have at least one)
+ == static int firstch(struct parse *p, cset *cs);
*/
static int /* character; there is no "none" value */
-firstch(struct parse *p, cset *cs)
+firstch(
+ struct parse *p,
+ cset *cs)
{
- int i;
- size_t css = (size_t)p->g->csetsize;
+ size_t i;
+ size_t css;
+
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
- for (i = 0; i < (ssize_t)css; i++)
+ css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
if (CHIN(cs, i))
return((char)i);
assert(never);
@@ -1145,15 +1380,23 @@ firstch(struct parse *p, cset *cs)
/*
- nch - number of characters in a set
+ == static int nch(struct parse *p, cset *cs);
*/
static int
-nch(struct parse *p, cset *cs)
+nch(
+ struct parse *p,
+ cset *cs)
{
- int i;
- size_t css = (size_t)p->g->csetsize;
+ size_t i;
+ size_t css;
int n = 0;
- for (i = 0; i < (ssize_t)css; i++)
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+
+ css = (size_t)p->g->csetsize;
+
+ for (i = 0; i < css; i++)
if (CHIN(cs, i))
n++;
return(n);
@@ -1161,63 +1404,170 @@ nch(struct parse *p, cset *cs)
/*
- mcadd - add a collating element to a cset
+ == static void mcadd(struct parse *p, cset *cs, \
+ == char *cp);
*/
static void
-mcadd( struct parse *p, cset *cs, char *cp)
+mcadd(
+ struct parse *p,
+ cset *cs,
+ const char *cp)
{
- size_t oldend = cs->smultis;
- void *np;
+ size_t oldend;
+
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+ _DIAGASSERT(cp != NULL);
+
+ oldend = cs->smultis;
cs->smultis += strlen(cp) + 1;
- np = realloc(cs->multis, cs->smultis);
- if (np == NULL) {
- if (cs->multis)
- free(cs->multis);
- cs->multis = NULL;
+ if (cs->multis == NULL)
+ cs->multis = malloc(cs->smultis);
+ else
+ cs->multis = realloc(cs->multis, cs->smultis);
+ if (cs->multis == NULL) {
SETERROR(REG_ESPACE);
return;
}
- cs->multis = np;
- strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
+ (void) strcpy(cs->multis + oldend - 1, cp);
+ cs->multis[cs->smultis - 1] = '\0';
}
+#if 0
+/*
+ - mcsub - subtract a collating element from a cset
+ == static void mcsub(cset *cs, char *cp);
+ */
+static void
+mcsub(
+ cset *cs,
+ char *cp)
+{
+ char *fp;
+ size_t len;
+
+ _DIAGASSERT(cs != NULL);
+ _DIAGASSERT(cp != NULL);
+
+ fp = mcfind(cs, cp);
+ len = strlen(fp);
+
+ assert(fp != NULL);
+ (void) memmove(fp, fp + len + 1,
+ cs->smultis - (fp + len + 1 - cs->multis));
+ cs->smultis -= len;
+
+ if (cs->smultis == 0) {
+ free(cs->multis);
+ cs->multis = NULL;
+ return;
+ }
+
+ cs->multis = realloc(cs->multis, cs->smultis);
+ assert(cs->multis != NULL);
+}
+
+/*
+ - mcin - is a collating element in a cset?
+ == static int mcin(cset *cs, char *cp);
+ */
+static int
+mcin(
+ cset *cs,
+ char *cp)
+{
+
+ _DIAGASSERT(cs != NULL);
+ _DIAGASSERT(cp != NULL);
+
+ return(mcfind(cs, cp) != NULL);
+}
+
+/*
+ - mcfind - find a collating element in a cset
+ == static char *mcfind(cset *cs, char *cp);
+ */
+static char *
+mcfind(
+ cset *cs,
+ char *cp)
+{
+ char *p;
+
+ _DIAGASSERT(cs != NULL);
+ _DIAGASSERT(cp != NULL);
+
+ if (cs->multis == NULL)
+ return(NULL);
+ for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
+ if (strcmp(cp, p) == 0)
+ return(p);
+ return(NULL);
+}
+#endif
+
/*
- mcinvert - invert the list of collating elements in a cset
+ == static void mcinvert(struct parse *p, cset *cs);
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
/* ARGSUSED */
static void
-mcinvert(struct parse *p, cset *cs)
+mcinvert(
+ struct parse *p,
+ cset *cs)
{
+
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+
assert(cs->multis == NULL); /* xxx */
}
/*
- mccase - add case counterparts of the list of collating elements in a cset
+ == static void mccase(struct parse *p, cset *cs);
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
/* ARGSUSED */
static void
-mccase(struct parse *p, cset *cs)
+mccase(
+ struct parse *p,
+ cset *cs)
{
+
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(cs != NULL);
+
assert(cs->multis == NULL); /* xxx */
}
/*
- isinsets - is this character in any sets?
+ == static int isinsets(struct re_guts *g, int c);
*/
static int /* predicate */
-isinsets(struct re_guts *g, int c)
+isinsets(
+ struct re_guts *g,
+ int c)
{
uch *col;
- int i;
- int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
- unsigned uc = (uch)c;
+ size_t i;
+ size_t ncols;
+ unsigned uc = (unsigned char)c;
+
+ _DIAGASSERT(g != NULL);
+
+ if (g->setbits == NULL)
+ return 0;
+
+ ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
if (col[uc] != 0)
@@ -1227,15 +1577,23 @@ isinsets(struct re_guts *g, int c)
/*
- samesets - are these two characters in exactly the same sets?
+ == static int samesets(struct re_guts *g, int c1, int c2);
*/
static int /* predicate */
-samesets(struct re_guts *g, int c1, int c2)
+samesets(
+ struct re_guts *g,
+ int c1,
+ int c2)
{
uch *col;
- int i;
- int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
- unsigned uc1 = (uch)c1;
- unsigned uc2 = (uch)c2;
+ size_t i;
+ size_t ncols;
+ unsigned uc1 = (unsigned char)c1;
+ unsigned uc2 = (unsigned char)c2;
+
+ _DIAGASSERT(g != NULL);
+
+ ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
if (col[uc1] != col[uc2])
@@ -1245,21 +1603,31 @@ samesets(struct re_guts *g, int c1, int c2)
/*
- categorize - sort out character categories
+ == static void categorize(struct parse *p, struct re_guts *g);
*/
static void
-categorize(struct parse *p, struct re_guts *g)
+categorize(
+ struct parse *p,
+ struct re_guts *g)
{
- cat_t *cats = g->categories;
+ cat_t *cats;
int c;
int c2;
cat_t cat;
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(g != NULL);
+
+ cats = g->categories;
+
/* avoid making error situations worse */
if (p->error != 0)
return;
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
if (cats[c] == 0 && isinsets(g, c)) {
+ _DIAGASSERT(__type_fit(unsigned char,
+ g->ncategories + 1));
cat = g->ncategories++;
cats[c] = cat;
for (c2 = c+1; c2 <= CHAR_MAX; c2++)
@@ -1270,36 +1638,48 @@ categorize(struct parse *p, struct re_guts *g)
/*
- dupl - emit a duplicate of a bunch of sops
+ == static sopno dupl(struct parse *p, sopno start, sopno finish);
*/
static sopno /* start of duplicate */
-dupl(struct parse *p,
- sopno start, /* from here */
- sopno finish) /* to this less one */
+dupl(
+ struct parse *p,
+ sopno start, /* from here */
+ sopno finish) /* to this less one */
{
- sopno ret = HERE();
+ sopno ret;
sopno len = finish - start;
+ _DIAGASSERT(p != NULL);
+
+ ret = HERE();
+
assert(finish >= start);
if (len == 0)
return(ret);
- enlarge(p, p->ssize + len); /* this many unexpected additions */
- assert(p->ssize >= p->slen + len);
- (void) memcpy((char *)(p->strip + p->slen),
- (char *)(p->strip + start), (size_t)len*sizeof(sop));
+ if (!enlarge(p, p->ssize + len))/* this many unexpected additions */
+ return ret;
+ (void)memcpy(p->strip + p->slen, p->strip + start,
+ (size_t)len * sizeof(sop));
p->slen += len;
return(ret);
}
/*
- doemit - emit a strip operator
+ == static void doemit(struct parse *p, sop op, size_t opnd);
*
* It might seem better to implement this as a macro with a function as
* hard-case backup, but it's just too big and messy unless there are
* some changes to the data structures. Maybe later.
*/
static void
-doemit(struct parse *p, sop op, size_t opnd)
+doemit(
+ struct parse *p,
+ sop op,
+ sopno opnd)
{
+ _DIAGASSERT(p != NULL);
+
/* avoid making error situations worse */
if (p->error != 0)
return;
@@ -1309,23 +1689,30 @@ doemit(struct parse *p, sop op, size_t opnd)
/* deal with undersized strip */
if (p->slen >= p->ssize)
- enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
- assert(p->slen < p->ssize);
+ if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
+ return;
/* finally, it's all reduced to the easy case */
- p->strip[p->slen++] = SOP(op, opnd);
+ p->strip[p->slen++] = (sop)SOP(op, opnd);
}
/*
- doinsert - insert a sop into the strip
+ == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
*/
static void
-doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
+doinsert(
+ struct parse *p,
+ sop op,
+ sopno opnd,
+ sopno pos)
{
sopno sn;
sop s;
int i;
+ _DIAGASSERT(p != NULL);
+
/* avoid making error situations worse */
if (p->error != 0)
return;
@@ -1346,53 +1733,78 @@ doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
}
}
- memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
- (HERE()-pos-1)*sizeof(sop));
+ memmove(&p->strip[pos+1], &p->strip[pos], (HERE()-pos-1)*sizeof(sop));
p->strip[pos] = s;
}
/*
- dofwd - complete a forward reference
+ == static void dofwd(struct parse *p, sopno pos, sop value);
*/
static void
-dofwd(struct parse *p, sopno pos, sop value)
+dofwd(
+ struct parse *p,
+ sopno pos,
+ sopno value)
{
+
+ _DIAGASSERT(p != NULL);
+
/* avoid making error situations worse */
if (p->error != 0)
return;
assert(value < 1<<OPSHIFT);
- p->strip[pos] = OP(p->strip[pos]) | value;
+ p->strip[pos] = (sop)(OP(p->strip[pos]) | value);
}
/*
- enlarge - enlarge the strip
+ == static void enlarge(struct parse *p, sopno size);
*/
-static void
-enlarge(struct parse *p, sopno size)
+static int
+enlarge(
+ struct parse *p,
+ sopno size)
{
sop *sp;
+ sopno osize;
+
+ _DIAGASSERT(p != NULL);
if (p->ssize >= size)
- return;
+ return 1;
- sp = (sop *)realloc(p->strip, size*sizeof(sop));
+ osize = p->ssize;
+ p->ssize = size;
+ if (MEMSIZE(p) > MEMLIMIT)
+ goto oomem;
+ sp = realloc(p->strip, p->ssize * sizeof(sop));
if (sp == NULL) {
+oomem:
+ p->ssize = osize;
SETERROR(REG_ESPACE);
- return;
+ return 0;
}
p->strip = sp;
- p->ssize = size;
+ return 1;
}
/*
- stripsnug - compact the strip
+ == static void stripsnug(struct parse *p, struct re_guts *g);
*/
static void
-stripsnug(struct parse *p, struct re_guts *g)
+stripsnug(
+ struct parse *p,
+ struct re_guts *g)
{
+
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(g != NULL);
+
g->nstates = p->slen;
- g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+ g->strip = realloc(p->strip, p->slen * sizeof(sop));
if (g->strip == NULL) {
SETERROR(REG_ESPACE);
g->strip = p->strip;
@@ -1401,6 +1813,7 @@ stripsnug(struct parse *p, struct re_guts *g)
/*
- findmust - fill in must and mlen with longest mandatory literal string
+ == static void findmust(struct parse *p, struct re_guts *g);
*
* This algorithm could do fancy things like analyzing the operands of |
* for common subsequences. Someday. This code is simple and finds most
@@ -1409,16 +1822,21 @@ stripsnug(struct parse *p, struct re_guts *g)
* Note that must and mlen got initialized during setup.
*/
static void
-findmust(struct parse *p, struct re_guts *g)
+findmust(
+ struct parse *p,
+ struct re_guts *g)
{
sop *scan;
- sop *start = NULL; /* start initialized in the default case, after that */
- sop *newstart; /* newstart was initialized in the OCHAR case */
+ sop *start = NULL;
+ sop *newstart = NULL;
sopno newlen;
sop s;
char *cp;
sopno i;
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(g != NULL);
+
/* avoid making error situations worse */
if (p->error != 0)
return;
@@ -1451,7 +1869,7 @@ findmust(struct parse *p, struct re_guts *g)
return;
}
} while (OP(s) != O_QUEST && OP(s) != O_CH);
- /* fallthrough */
+ /* FALLTHROUGH */
default: /* things that break a sequence */
if (newlen > g->mlen) { /* ends one */
start = newstart;
@@ -1462,7 +1880,10 @@ findmust(struct parse *p, struct re_guts *g)
}
} while (OP(s) != OEND);
- if (g->mlen == 0) /* there isn't one */
+ if (start == NULL)
+ g->mlen = 0;
+
+ if (g->mlen == 0) /* there isn't one */
return;
/* turn it into a character string */
@@ -1485,15 +1906,21 @@ findmust(struct parse *p, struct re_guts *g)
/*
- pluscount - count + nesting
+ == static sopno pluscount(struct parse *p, struct re_guts *g);
*/
static sopno /* nesting depth */
-pluscount(struct parse *p, struct re_guts *g)
+pluscount(
+ struct parse *p,
+ struct re_guts *g)
{
sop *scan;
sop s;
sopno plusnest = 0;
sopno maxnest = 0;
+ _DIAGASSERT(p != NULL);
+ _DIAGASSERT(g != NULL);
+
if (p->error != 0)
return(0); /* there may not be an OEND */
diff --git a/libc/upstream-netbsd/libc/regex/regerror.c b/libc/upstream-netbsd/libc/regex/regerror.c
new file mode 100644
index 0000000..e00d7c0
--- /dev/null
+++ b/libc/upstream-netbsd/libc/regex/regerror.c
@@ -0,0 +1,223 @@
+/* $NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $ */
+
+/*-
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regerror.c 8.4 (Berkeley) 3/20/94
+ */
+
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regerror.c 8.4 (Berkeley) 3/20/94
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
+#else
+__RCSID("$NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $");
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <regex.h>
+
+#ifdef __weak_alias
+__weak_alias(regerror,_regerror)
+#endif
+
+#include "utils.h"
+
+/* ========= begin header generated by ./mkh ========= */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* === regerror.c === */
+static const char *regatoi(const regex_t *preg, char *localbuf, size_t buflen);
+
+#ifdef __cplusplus
+}
+#endif
+/* ========= end header generated by ./mkh ========= */
+/*
+ = #define REG_NOMATCH 1
+ = #define REG_BADPAT 2
+ = #define REG_ECOLLATE 3
+ = #define REG_ECTYPE 4
+ = #define REG_EESCAPE 5
+ = #define REG_ESUBREG 6
+ = #define REG_EBRACK 7
+ = #define REG_EPAREN 8
+ = #define REG_EBRACE 9
+ = #define REG_BADBR 10
+ = #define REG_ERANGE 11
+ = #define REG_ESPACE 12
+ = #define REG_BADRPT 13
+ = #define REG_EMPTY 14
+ = #define REG_ASSERT 15
+ = #define REG_INVARG 16
+ = #define REG_ATOI 255 // convert name to number (!)
+ = #define REG_ITOA 0400 // convert number to name (!)
+ */
+static const struct rerr {
+ int code;
+ const char *name;
+ const char *explain;
+} rerrs[] = {
+ { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
+ { REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
+ { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
+ { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
+ { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
+ { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
+ { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
+ { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
+ { REG_EBRACE, "REG_EBRACE", "braces not balanced" },
+ { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
+ { REG_ERANGE, "REG_ERANGE", "invalid character range" },
+ { REG_ESPACE, "REG_ESPACE", "out of memory" },
+ { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
+ { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
+ { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
+ { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
+ { 0, "", "*** unknown regexp error code ***" }
+};
+
+/*
+ * regerror - the interface to error numbers
+ * extern size_t regerror(int, const regex_t *, char *, size_t);
+ */
+/* ARGSUSED */
+size_t
+regerror(
+ int errcode,
+ const regex_t *preg,
+ char *errbuf,
+ size_t errbuf_size)
+{
+ const struct rerr *r;
+ size_t len;
+ int target = errcode &~ REG_ITOA;
+ const char *s;
+ char convbuf[50];
+
+ _DIAGASSERT(errcode != REG_ATOI || preg != NULL);
+ _DIAGASSERT(errbuf != NULL);
+
+ if (errcode == REG_ATOI)
+ s = regatoi(preg, convbuf, sizeof convbuf);
+ else {
+ for (r = rerrs; r->code != 0; r++)
+ if (r->code == target)
+ break;
+
+ if (errcode & REG_ITOA) {
+ if (r->code != 0) {
+ (void)strlcpy(convbuf, r->name, sizeof convbuf);
+ } else
+ (void)snprintf(convbuf, sizeof convbuf,
+ "REG_0x%x", target);
+ s = convbuf;
+ } else
+ s = r->explain;
+ }
+
+ len = strlen(s) + 1;
+ if (errbuf_size > 0)
+ (void)strlcpy(errbuf, s, errbuf_size);
+
+ return(len);
+}
+
+/*
+ * regatoi - internal routine to implement REG_ATOI
+ * static const char *regatoi(const regex_t *preg, char *localbuf,
+ * size_t buflen);
+ */
+static const char *
+regatoi(
+ const regex_t *preg,
+ char *localbuf,
+ size_t buflen)
+{
+ const struct rerr *r;
+
+ for (r = rerrs; r->code != 0; r++)
+ if (strcmp(r->name, preg->re_endp) == 0)
+ break;
+ if (r->code == 0)
+ return "0";
+
+ (void)snprintf(localbuf, buflen, "%d", r->code);
+ return localbuf;
+}
diff --git a/libc/regex/regex2.h b/libc/upstream-netbsd/libc/regex/regex2.h
index 15e15bc..7c877ee 100644
--- a/libc/regex/regex2.h
+++ b/libc/upstream-netbsd/libc/regex/regex2.h
@@ -1,7 +1,6 @@
-/* $OpenBSD: regex2.h,v 1.7 2004/11/30 17:04:23 otto Exp $ */
+/* $NetBSD: regex2.h,v 1.13 2011/10/09 18:23:00 christos Exp $ */
/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -35,6 +34,57 @@
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
*/
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regex2.h 8.4 (Berkeley) 3/20/94
+ */
+
+/*
+ * First, the stuff that ends up in the outside-world include file
+ = typedef off_t regoff_t;
+ = typedef struct {
+ = int re_magic;
+ = size_t re_nsub; // number of parenthesized subexpressions
+ = const char *re_endp; // end pointer for REG_PEND
+ = struct re_guts *re_g; // none of your business :-)
+ = } regex_t;
+ = typedef struct {
+ = regoff_t rm_so; // start of match
+ = regoff_t rm_eo; // end of match
+ = } regmatch_t;
+ */
/*
* internals of regex_t
*/
@@ -59,36 +109,38 @@
* In state representations, an operator's bit is on to signify a state
* immediately *preceding* "execution" of that operator.
*/
-typedef unsigned long sop; /* strip operator */
-typedef long sopno;
-#define OPRMASK 0xf8000000LU
-#define OPDMASK 0x07ffffffLU
+typedef u_int32_t sop; /* strip operator */
+typedef size_t sopno;
+#define OPRMASK ((u_int32_t)0xf8000000UL)
+#define OPDMASK ((u_int32_t)0x07ffffffUL)
#define OPSHIFT ((unsigned)27)
#define OP(n) ((n)&OPRMASK)
-#define OPND(n) ((n)&OPDMASK)
+#define OPND(n) ((int)((n)&OPDMASK))
#define SOP(op, opnd) ((op)|(opnd))
-/* operators meaning operand */
-/* (back, fwd are offsets) */
-#define OEND (1LU<<OPSHIFT) /* endmarker - */
-#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */
-#define OBOL (3LU<<OPSHIFT) /* left anchor - */
-#define OEOL (4LU<<OPSHIFT) /* right anchor - */
-#define OANY (5LU<<OPSHIFT) /* . - */
-#define OANYOF (6LU<<OPSHIFT) /* [...] set number */
-#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */
-#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */
-#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */
-#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */
-#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */
-#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */
-#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */
-#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */
-#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */
-#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
-#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
-#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */
-#define OBOW (19LU<<OPSHIFT) /* begin word - */
-#define OEOW (20LU<<OPSHIFT) /* end word - */
+
+#define OPC(n) (((u_int32_t)(n))<<OPSHIFT)
+/* operators meaning operand */
+/* (back, fwd are offsets) */
+#define OEND OPC(1) /* endmarker - */
+#define OCHAR OPC(2) /* character unsigned char */
+#define OBOL OPC(3) /* left anchor - */
+#define OEOL OPC(4) /* right anchor - */
+#define OANY OPC(5) /* . - */
+#define OANYOF OPC(6) /* [...] set number */
+#define OBACK_ OPC(7) /* begin \d paren number */
+#define O_BACK OPC(8) /* end \d paren number */
+#define OPLUS_ OPC(9) /* + prefix fwd to suffix */
+#define O_PLUS OPC(10) /* + suffix back to prefix */
+#define OQUEST_ OPC(11) /* ? prefix fwd to suffix */
+#define O_QUEST OPC(12) /* ? suffix back to prefix */
+#define OLPAREN OPC(13) /* ( fwd to ) */
+#define ORPAREN OPC(14) /* ) back to ( */
+#define OCH_ OPC(15) /* begin choice fwd to OOR2 */
+#define OOR1 OPC(16) /* | pt. 1 back to OOR1 or OCH_ */
+#define OOR2 OPC(17) /* | pt. 2 fwd to OOR2 or O_CH */
+#define O_CH OPC(18) /* end choice back to OOR1 */
+#define OBOW OPC(19) /* begin word - */
+#define OEOW OPC(20) /* end word - */
/*
* Structure for [] character-set representation. Character sets are
@@ -127,8 +179,8 @@ struct re_guts {
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
- int csetsize; /* number of bits in a cset vector */
- int ncsets; /* number of csets in use */
+ size_t csetsize; /* number of bits in a cset vector */
+ size_t ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int cflags; /* copy of regcomp() cflags argument */
@@ -139,12 +191,12 @@ struct re_guts {
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define BAD 04 /* something wrong */
- int nbol; /* number of ^ used */
- int neol; /* number of $ used */
- int ncategories; /* how many character categories */
+ size_t nbol; /* number of ^ used */
+ size_t neol; /* number of $ used */
+ size_t ncategories; /* how many character categories */
cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
- int mlen; /* length of must */
+ size_t mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */
@@ -154,4 +206,4 @@ struct re_guts {
/* misc utilities */
#define OUT (CHAR_MAX+1) /* a non-character value */
-#define ISWORD(c) (isalnum(c) || (c) == '_')
+#define ISWORD(c) (isalnum((unsigned char)c) || (c) == '_')
diff --git a/libc/regex/regexec.c b/libc/upstream-netbsd/libc/regex/regexec.c
index 6feed3b..f16e0b6 100644
--- a/libc/regex/regexec.c
+++ b/libc/upstream-netbsd/libc/regex/regexec.c
@@ -1,6 +1,6 @@
-/* $OpenBSD: regexec.c,v 1.11 2005/08/05 13:03:00 espie Exp $ */
+/* $NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $ */
+
/*-
- * Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
@@ -34,6 +34,52 @@
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
*/
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regexec.c 8.3 (Berkeley) 3/20/94
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
+#else
+__RCSID("$NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $");
+#endif
+#endif /* LIBC_SCCS and not lint */
+
/*
* the outer shell of regexec()
*
@@ -41,39 +87,46 @@
* macros that code uses. This lets the same code operate on two different
* representations for state sets.
*/
+#include "namespace.h"
#include <sys/types.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <limits.h>
-#include <ctype.h>
#include <regex.h>
+#ifdef __weak_alias
+__weak_alias(regexec,_regexec)
+#endif
+
#include "utils.h"
#include "regex2.h"
/* macros for manipulating states, small version */
-#define states long
-#define states1 states /* for later use in regexec() decision */
+#define states unsigned long
+#define states1 unsigned long /* for later use in regexec() decision */
#define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
#define ASSIGN(d, s) ((d) = (s))
#define EQ(a, b) ((a) == (b))
-#define STATEVARS long dummy /* dummy version */
+#define STATEVARS int dummy /* dummy version */
#define STATESETUP(m, n) /* nothing */
#define STATETEARDOWN(m) /* nothing */
#define SETUP(v) ((v) = 0)
-#define onestate long
+#define onestate unsigned long
#define INIT(o, n) ((o) = (unsigned long)1 << (n))
-#define INC(o) ((o) <<= 1)
+#define INC(o) ((o) <<= 1)
#define ISSTATEIN(v, o) (((v) & (o)) != 0)
/* some abbreviations; note that some of these know variable names! */
/* do "if I'm here, I can also be there" etc without branches */
#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
-#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
+#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
/* function names */
#define SNAMES /* engine.c looks after details */
@@ -102,20 +155,23 @@
/* macros for manipulating states, large version */
#define states char *
-#define CLEAR(v) memset(v, 0, m->g->nstates)
+#define CLEAR(v) memset(v, 0, (size_t)m->g->nstates)
#define SET0(v, n) ((v)[n] = 0)
#define SET1(v, n) ((v)[n] = 1)
#define ISSET(v, n) ((v)[n])
-#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
-#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
-#define STATEVARS long vn; char *space
-#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
- if ((m)->space == NULL) return(REG_ESPACE); \
- (m)->vn = 0; }
-#define STATETEARDOWN(m) { free((m)->space); }
-#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
-#define onestate long
-#define INIT(o, n) ((o) = (n))
+#define ASSIGN(d, s) memcpy(d, s, (size_t)m->g->nstates)
+#define EQ(a, b) (memcmp(a, b, (size_t)m->g->nstates) == 0)
+#define STATEVARS int vn; char *space
+#define STATESETUP(m, nv) \
+ if (((m)->space = malloc((size_t)((nv)*(m)->g->nstates))) == NULL) \
+ return(REG_ESPACE); \
+ else \
+ (m)->vn = 0
+
+#define STATETEARDOWN(m) { free((m)->space); m->space = NULL; }
+#define SETUP(v) ((v) = &m->space[(size_t)(m->vn++ * m->g->nstates)])
+#define onestate int
+#define INIT(o, n) ((o) = (int)(n))
#define INC(o) ((o)++)
#define ISSTATEIN(v, o) ((v)[o])
/* some abbreviations; note that some of these know variable names! */
@@ -130,22 +186,38 @@
/*
- regexec - interface for matching
+ = extern int regexec(const regex_t *, const char *, size_t, \
+ = regmatch_t [], int);
+ = #define REG_NOTBOL 00001
+ = #define REG_NOTEOL 00002
+ = #define REG_STARTEND 00004
+ = #define REG_TRACE 00400 // tracing of execution
+ = #define REG_LARGE 01000 // force large representation
+ = #define REG_BACKR 02000 // force use of backref code
*
* We put this here so we can exploit knowledge of the state representation
* when choosing which matcher to call. Also, by this point the matchers
* have been prototyped.
*/
int /* 0 success, REG_NOMATCH failure */
-regexec(const regex_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags)
+regexec(
+ const regex_t *preg,
+ const char *string,
+ size_t nmatch,
+ regmatch_t pmatch[],
+ int eflags)
{
struct re_guts *g = preg->re_g;
+ char *s;
#ifdef REDEBUG
# define GOODFLAGS(f) (f)
#else
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
#endif
+ _DIAGASSERT(preg != NULL);
+ _DIAGASSERT(string != NULL);
+
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
return(REG_BADPAT);
assert(!(g->iflags&BAD));
@@ -153,8 +225,10 @@ regexec(const regex_t *preg, const char *string, size_t nmatch,
return(REG_BADPAT);
eflags = GOODFLAGS(eflags);
- if (g->nstates <= (int)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
- return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
+ s = __UNCONST(string);
+
+ if (g->nstates <= (sopno)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
+ return(smatcher(g, s, nmatch, pmatch, eflags));
else
- return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
+ return(lmatcher(g, s, nmatch, pmatch, eflags));
}
diff --git a/libc/upstream-netbsd/libc/regex/regfree.c b/libc/upstream-netbsd/libc/regex/regfree.c
new file mode 100644
index 0000000..ce011ea
--- /dev/null
+++ b/libc/upstream-netbsd/libc/regex/regfree.c
@@ -0,0 +1,129 @@
+/* $NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $ */
+
+/*-
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regfree.c 8.3 (Berkeley) 3/20/94
+ */
+
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)regfree.c 8.3 (Berkeley) 3/20/94
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94";
+#else
+__RCSID("$NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $");
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#include "namespace.h"
+#include <sys/types.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <regex.h>
+
+#ifdef __weak_alias
+__weak_alias(regfree,_regfree)
+#endif
+
+#include "utils.h"
+#include "regex2.h"
+
+/*
+ - regfree - free everything
+ = extern void regfree(regex_t *);
+ */
+void
+regfree(
+ regex_t *preg)
+{
+ struct re_guts *g;
+
+ _DIAGASSERT(preg != NULL);
+
+ _DIAGASSERT(preg->re_magic == MAGIC1);
+ if (preg->re_magic != MAGIC1) /* oops */
+ return; /* nice to complain, but hard */
+
+ g = preg->re_g;
+ if (g == NULL || g->magic != MAGIC2) /* oops again */
+ return;
+ preg->re_magic = 0; /* mark it invalid */
+ g->magic = 0; /* mark it invalid */
+
+ if (g->strip != NULL)
+ free(g->strip);
+ if (g->sets != NULL)
+ free(g->sets);
+ if (g->setbits != NULL)
+ free(g->setbits);
+ if (g->must != NULL)
+ free(g->must);
+ free(g);
+}
diff --git a/libc/upstream-netbsd/libc/regex/utils.h b/libc/upstream-netbsd/libc/regex/utils.h
new file mode 100644
index 0000000..762caee
--- /dev/null
+++ b/libc/upstream-netbsd/libc/regex/utils.h
@@ -0,0 +1,91 @@
+/* $NetBSD: utils.h,v 1.6 2003/08/07 16:43:21 agc Exp $ */
+
+/*-
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)utils.h 8.3 (Berkeley) 3/20/94
+ */
+
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)utils.h 8.3 (Berkeley) 3/20/94
+ */
+
+/* utility definitions */
+#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
+#define INFINITY (DUPMAX + 1)
+#define NC (CHAR_MAX - CHAR_MIN + 1)
+typedef unsigned char uch;
+
+/* switch off assertions (if not already off) if no REDEBUG */
+#ifndef REDEBUG
+#ifndef NDEBUG
+#define NDEBUG /* no assertions please */
+#endif
+#endif
+#include <assert.h>
+
+/* for old systems with bcopy() but no memmove() */
+#ifdef USEBCOPY
+#define memmove(d, s, c) bcopy(s, d, c)
+#endif
diff --git a/libc/upstream-netbsd/netbsd-compat.h b/libc/upstream-netbsd/netbsd-compat.h
index a52052a..3833c1d 100644
--- a/libc/upstream-netbsd/netbsd-compat.h
+++ b/libc/upstream-netbsd/netbsd-compat.h
@@ -21,4 +21,7 @@
#include <assert.h>
#define _DIAGASSERT(e) ((e) ? (void) 0 : __assert2(__FILE__, __LINE__, __func__, #e))
+// TODO: update our <sys/cdefs.h> to support this properly.
+#define __type_fit(t, a) (0 == 0)
+
#endif
diff --git a/tests/Android.mk b/tests/Android.mk
new file mode 100644
index 0000000..2721138
--- /dev/null
+++ b/tests/Android.mk
@@ -0,0 +1,43 @@
+#
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright The Android Open Source Project
+
+LOCAL_PATH := $(call my-dir)
+
+test_module = bionic-unit-tests
+test_tags = eng tests
+
+test_src_files = \
+ regex_test.cpp \
+
+# Build for the device (with bionic). Run with:
+# adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests
+include $(CLEAR_VARS)
+LOCAL_MODULE := $(test_module)
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_MODULE_TAGS := $(test_tags)
+LOCAL_SRC_FILES := $(test_src_files)
+include $(BUILD_NATIVE_TEST)
+
+# Build for the host (with glibc).
+# Note that this will build against glibc, so it's not useful for testing
+# bionic's implementation, but it does let you use glibc as a reference
+# implementation for testing the tests themselves.
+include $(CLEAR_VARS)
+LOCAL_MODULE := $(test_module)-glibc
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_MODULE_TAGS := $(test_tags)
+LOCAL_SRC_FILES := $(test_src_files)
+include $(BUILD_HOST_NATIVE_TEST)
diff --git a/tests/regex_test.cpp b/tests/regex_test.cpp
new file mode 100644
index 0000000..659d1db
--- /dev/null
+++ b/tests/regex_test.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <sys/types.h>
+#include <regex.h>
+
+TEST(regex, smoke) {
+ // A quick test of all the regex functions.
+ regex_t re;
+ ASSERT_EQ(0, regcomp(&re, "ab*c", 0));
+ ASSERT_EQ(0, regexec(&re, "abbbc", 0, NULL, 0));
+ ASSERT_EQ(REG_NOMATCH, regexec(&re, "foo", 0, NULL, 0));
+
+ char buf[80];
+ regerror(REG_NOMATCH, &re, buf, sizeof(buf));
+#if __BIONIC__
+ ASSERT_STREQ("regexec() failed to match", buf);
+#else
+ ASSERT_STREQ("No match", buf);
+#endif
+
+ regfree(&re);
+}