diff options
author | Elliott Hughes <enh@google.com> | 2012-08-14 15:32:42 -0700 |
---|---|---|
committer | Elliott Hughes <enh@google.com> | 2012-08-15 15:39:11 -0700 |
commit | cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8 (patch) | |
tree | 614352f0342e88315e758fbf63e35172578f260d | |
parent | 53493a9b26e9fb665de590abf3502af94eaf3f44 (diff) | |
download | bionic-cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8.zip bionic-cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8.tar.gz bionic-cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8.tar.bz2 |
Switch to the current NetBSD regex implementation.
Change-Id: If32b28dd85d6a7ab8957ab81d19fa4c0de9499d5
18 files changed, 1832 insertions, 717 deletions
diff --git a/libc/Android.mk b/libc/Android.mk index 7b1aa99..51cef8a 100644 --- a/libc/Android.mk +++ b/libc/Android.mk @@ -9,7 +9,6 @@ libc_common_src_files := \ unistd/abort.c \ unistd/alarm.c \ unistd/brk.c \ - unistd/creat.c \ unistd/daemon.c \ unistd/eventfd.c \ unistd/exec.c \ @@ -328,13 +327,14 @@ libc_common_src_files := \ netbsd/nameser/ns_netint.c \ netbsd/nameser/ns_print.c \ netbsd/nameser/ns_samedomain.c \ - regex/regcomp.c \ - regex/regerror.c \ - regex/regexec.c \ - regex/regfree.c \ libc_upstream_netbsd_src_files := \ + upstream-netbsd/libc/compat-43/creat.c \ upstream-netbsd/libc/gen/nice.c \ + upstream-netbsd/libc/regex/regcomp.c \ + upstream-netbsd/libc/regex/regerror.c \ + upstream-netbsd/libc/regex/regexec.c \ + upstream-netbsd/libc/regex/regfree.c \ upstream-netbsd/libc/stdlib/tdelete.c \ upstream-netbsd/libc/stdlib/tfind.c \ upstream-netbsd/libc/stdlib/tsearch.c \ diff --git a/libc/NOTICE b/libc/NOTICE index 14a826a..8bf4978 100644 --- a/libc/NOTICE +++ b/libc/NOTICE @@ -2211,7 +2211,6 @@ SUCH DAMAGE. ------------------------------------------------------------------- -Copyright (c) 1992, 1993, 1994 Henry Spencer. Copyright (c) 1992, 1993, 1994 The Regents of the University of California. All rights reserved. @@ -2244,6 +2243,41 @@ SUCH DAMAGE. ------------------------------------------------------------------- +Copyright (c) 1992, 1993, 1994 Henry Spencer. + +This code is derived from software contributed to Berkeley by +Henry Spencer. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by the University of + California, Berkeley and its contributors. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +------------------------------------------------------------------- + Copyright (c) 1993 The Regents of the University of California. All rights reserved. diff --git a/libc/regex/regerror.c b/libc/regex/regerror.c deleted file mode 100644 index 838ec8f..0000000 --- a/libc/regex/regerror.c +++ /dev/null @@ -1,130 +0,0 @@ -/* $OpenBSD: regerror.c,v 1.13 2005/08/05 13:03:00 espie Exp $ */ -/*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Henry Spencer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)regerror.c 8.4 (Berkeley) 3/20/94 - */ - -#include <sys/types.h> -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <limits.h> -#include <stdlib.h> -#include <regex.h> - -#include "utils.h" - -static char *regatoi(const regex_t *, char *, int); - -static const struct rerr { - int code; - char *name; - char *explain; -} rerrs[] = { - { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, - { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, - { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, - { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, - { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, - { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, - { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, - { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, - { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, - { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, - { REG_ERANGE, "REG_ERANGE", "invalid character range" }, - { REG_ESPACE, "REG_ESPACE", "out of memory" }, - { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, - { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, - { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, - { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, - { 0, "", "*** unknown regexp error code ***" } -}; - -/* - - regerror - the interface to error numbers - = extern size_t regerror(int, const regex_t *, char *, size_t); - */ -/* ARGSUSED */ -size_t -regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) -{ - const struct rerr *r; - size_t len; - int target = errcode &~ REG_ITOA; - char *s; - char convbuf[50]; - - if (errcode == REG_ATOI) - s = regatoi(preg, convbuf, sizeof convbuf); - else { - for (r = rerrs; r->code != 0; r++) - if (r->code == target) - break; - - if (errcode®_ITOA) { - if (r->code != 0) { - assert(strlen(r->name) < sizeof(convbuf)); - (void) strlcpy(convbuf, r->name, sizeof convbuf); - } else - (void)snprintf(convbuf, sizeof convbuf, - "REG_0x%x", target); - s = convbuf; - } else - s = r->explain; - } - - len = strlen(s) + 1; - if (errbuf_size > 0) { - strlcpy(errbuf, s, errbuf_size); - } - - return(len); -} - -/* - - regatoi - internal routine to implement REG_ATOI - */ -static char * -regatoi(const regex_t *preg, char *localbuf, int localbufsize) -{ - const struct rerr *r; - - for (r = rerrs; r->code != 0; r++) - if (strcmp(r->name, preg->re_endp) == 0) - break; - if (r->code == 0) - return("0"); - - (void)snprintf(localbuf, localbufsize, "%d", r->code); - return(localbuf); -} diff --git a/libc/regex/regfree.c b/libc/regex/regfree.c deleted file mode 100644 index a57eba3..0000000 --- a/libc/regex/regfree.c +++ /dev/null @@ -1,71 +0,0 @@ -/* $OpenBSD: regfree.c,v 1.7 2005/08/05 13:03:00 espie Exp $ */ -/*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Henry Spencer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)regfree.c 8.3 (Berkeley) 3/20/94 - */ - -#include <sys/types.h> -#include <stdio.h> -#include <stdlib.h> -#include <regex.h> - -#include "utils.h" -#include "regex2.h" - -/* - - regfree - free everything - */ -void -regfree(regex_t *preg) -{ - struct re_guts *g; - - if (preg->re_magic != MAGIC1) /* oops */ - return; /* nice to complain, but hard */ - - g = preg->re_g; - if (g == NULL || g->magic != MAGIC2) /* oops again */ - return; - preg->re_magic = 0; /* mark it invalid */ - g->magic = 0; /* mark it invalid */ - - if (g->strip != NULL) - free((char *)g->strip); - if (g->sets != NULL) - free((char *)g->sets); - if (g->setbits != NULL) - free((char *)g->setbits); - if (g->must != NULL) - free(g->must); - free((char *)g); -} diff --git a/libc/unistd/creat.c b/libc/unistd/creat.c deleted file mode 100644 index 1b14465..0000000 --- a/libc/unistd/creat.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include <unistd.h> -#include <fcntl.h> - -int creat(const char* pathname, mode_t mode) -{ - return open(pathname, O_WRONLY|O_TRUNC|O_CREAT, mode); -} diff --git a/libc/regex/utils.h b/libc/upstream-netbsd/libc/compat-43/creat.c index 3e184fc..9560bea 100644 --- a/libc/regex/utils.h +++ b/libc/upstream-netbsd/libc/compat-43/creat.c @@ -1,13 +1,9 @@ -/* $OpenBSD: utils.h,v 1.4 2003/06/02 20:18:36 millert Exp $ */ +/* $NetBSD: creat.c,v 1.10 2003/08/07 16:42:39 agc Exp $ */ -/*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 +/* + * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * - * This code is derived from software contributed to Berkeley by - * Henry Spencer. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -31,25 +27,26 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)utils.h 8.3 (Berkeley) 3/20/94 */ -/* utility definitions */ -#define DUPMAX 255 -#define INFINITY (DUPMAX + 1) -#define NC (CHAR_MAX - CHAR_MIN + 1) -typedef unsigned char uch; - -/* switch off assertions (if not already off) if no REDEBUG */ -#ifndef REDEBUG -#ifndef NDEBUG -#define NDEBUG /* no assertions please */ -#endif +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)creat.c 8.1 (Berkeley) 6/2/93"; +#else +__RCSID("$NetBSD: creat.c,v 1.10 2003/08/07 16:42:39 agc Exp $"); #endif +#endif /* LIBC_SCCS and not lint */ + #include <assert.h> +#include <errno.h> +#include <fcntl.h> -/* for old systems with bcopy() but no memmove() */ -#ifdef USEBCOPY -#define memmove(d, s, c) bcopy(s, d, c) -#endif +int +creat(const char *path, mode_t mode) +{ + + _DIAGASSERT(path != NULL); + + return(open(path, O_WRONLY|O_CREAT|O_TRUNC, mode)); +} diff --git a/libc/regex/cclass.h b/libc/upstream-netbsd/libc/regex/cclass.h index d105491..3ab2ccb 100644 --- a/libc/regex/cclass.h +++ b/libc/upstream-netbsd/libc/regex/cclass.h @@ -1,7 +1,6 @@ -/* $OpenBSD: cclass.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */ +/* $NetBSD: cclass.h,v 1.7 2003/08/07 16:43:19 agc Exp $ */ /*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * @@ -35,34 +34,71 @@ * @(#)cclass.h 8.3 (Berkeley) 3/20/94 */ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cclass.h 8.3 (Berkeley) 3/20/94 + */ + /* character-class table */ static const struct cclass { - char *name; - char *chars; - char *multis; + const char *name; + const char *chars; + const char *multis; } cclasses[] = { { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", ""} , +0123456789", "" }, { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - ""} , - { "blank", " \t", ""} , + "" }, + { "blank", " \t", "" }, { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", ""} , - { "digit", "0123456789", ""} , +\25\26\27\30\31\32\33\34\35\36\37\177", "" }, + { "digit", "0123456789", "" }, { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""} , + "" }, { "lower", "abcdefghijklmnopqrstuvwxyz", - ""} , + "" }, { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - ""} , + "" }, { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""} , - { "space", "\t\n\v\f\r ", ""} , + "" }, + { "space", "\t\n\v\f\r ", "" }, { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - ""} , + "" }, { "xdigit", "0123456789ABCDEFabcdef", - ""} , + "" }, { NULL, 0, "" } }; diff --git a/libc/regex/cname.h b/libc/upstream-netbsd/libc/regex/cname.h index b674b68..4b9ef39 100644 --- a/libc/regex/cname.h +++ b/libc/upstream-netbsd/libc/regex/cname.h @@ -1,7 +1,6 @@ -/* $OpenBSD: cname.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */ +/* $NetBSD: cname.h,v 1.7 2003/08/07 16:43:19 agc Exp $ */ /*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * @@ -35,9 +34,46 @@ * @(#)cname.h 8.3 (Berkeley) 3/20/94 */ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cname.h 8.3 (Berkeley) 3/20/94 + */ + /* character-name table */ static const struct cname { - char *name; + const char *name; char code; } cnames[] = { { "NUL", '\0' }, @@ -135,5 +171,5 @@ static const struct cname { { "right-curly-bracket", '}' }, { "tilde", '~' }, { "DEL", '\177' }, - { NULL, 0 } + { NULL, 0 }, }; diff --git a/libc/regex/engine.c b/libc/upstream-netbsd/libc/regex/engine.c index eae6ff2..2a800d4 100644 --- a/libc/regex/engine.c +++ b/libc/upstream-netbsd/libc/regex/engine.c @@ -1,7 +1,6 @@ -/* $OpenBSD: engine.c,v 1.15 2005/08/05 13:03:00 espie Exp $ */ +/* $NetBSD: engine.c,v 1.24 2012/03/13 21:13:42 christos Exp $ */ /*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * @@ -35,6 +34,43 @@ * @(#)engine.c 8.5 (Berkeley) 3/20/94 */ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)engine.c 8.5 (Berkeley) 3/20/94 + */ + /* * The matching engine and friends. This file is #included by regexec.c * after suitable #defines of a variety of macros used herein, so that @@ -72,11 +108,11 @@ struct match { struct re_guts *g; int eflags; regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ - char *offp; /* offsets work from here */ - char *beginp; /* start of string -- virtual NUL precedes */ - char *endp; /* end of string -- virtual NUL here */ - char *coldp; /* can be no match starting before here */ - char **lastpos; /* [nplus+1] */ + const char *offp; /* offsets work from here */ + const char *beginp; /* start of string -- virtual NUL precedes */ + const char *endp; /* end of string -- virtual NUL here */ + const char *coldp; /* can be no match starting before here */ + const char **lastpos; /* [nplus+1] */ STATEVARS; states st; /* current states */ states fresh; /* states for a fresh start */ @@ -84,13 +120,18 @@ struct match { states empty; /* empty set of states */ }; -static int matcher(struct re_guts *, char *, size_t, regmatch_t[], int); -static char *dissect(struct match *, char *, char *, sopno, sopno); -static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int); -static char *fast(struct match *, char *, char *, sopno, sopno); -static char *slow(struct match *, char *, char *, sopno, sopno); -static states step(struct re_guts *, sopno, sopno, states, int, states); -#define MAX_RECURSION 100 +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === engine.c === */ +static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); +static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); +static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev); +static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); +static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); +static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft); #define BOL (OUT+1) #define EOL (BOL+1) #define BOLEOL (BOL+2) @@ -101,19 +142,24 @@ static states step(struct re_guts *, sopno, sopno, states, int, states); #define NONCHAR(c) ((c) > CHAR_MAX) #define NNONCHAR (CODEMAX-CHAR_MAX) #ifdef REDEBUG -static void print(struct match *, char *, states, int, FILE *); +static void print(struct match *m, char *caption, states st, int ch, FILE *d); #endif #ifdef REDEBUG -static void at(struct match *, char *, char *, char *, sopno, sopno); +static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); #endif #ifdef REDEBUG -static char *pchar(int); +static char *pchar(int ch); +#endif + +#ifdef __cplusplus +} #endif +/* ========= end header generated by ./mkh ========= */ #ifdef REDEBUG #define SP(t, s, c) print(m, t, s, c, stdout) #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) -#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } +#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } static int nope = 0; #else #define SP(t, s, c) /* nothing */ @@ -123,27 +169,39 @@ static int nope = 0; /* - matcher - the actual matching engine + == static int matcher(struct re_guts *g, char *string, \ + == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ -matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], +matcher( + struct re_guts *g, + const char *string, + size_t nmatch, + regmatch_t pmatch[], int eflags) { - char *endp; - int i; + const char *endp; + size_t i; struct match mv; struct match *m = &mv; - char *dp; + const char *dp; const sopno gf = g->firststate+1; /* +1 for OEND */ const sopno gl = g->laststate; - char *start; - char *stop; + const char *start; + const char *stop; + int error = 0; + + _DIAGASSERT(g != NULL); + _DIAGASSERT(string != NULL); + /* pmatch checked below */ /* simplify the situation where possible */ if (g->cflags®_NOSUB) nmatch = 0; if (eflags®_STARTEND) { - start = string + pmatch[0].rm_so; - stop = string + pmatch[0].rm_eo; + _DIAGASSERT(pmatch != NULL); + start = string + (size_t)pmatch[0].rm_so; + stop = string + (size_t)pmatch[0].rm_eo; } else { start = string; stop = start + strlen(start); @@ -154,8 +212,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { for (dp = start; dp < stop; dp++) - if (*dp == g->must[0] && stop - dp >= g->mlen && - memcmp(dp, g->must, (size_t)g->mlen) == 0) + if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen && + memcmp(dp, g->must, g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ return(REG_NOMATCH); @@ -180,10 +238,8 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], for (;;) { endp = fast(m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ - free(m->pmatch); - free(m->lastpos); - STATETEARDOWN(m); - return(REG_NOMATCH); + error = REG_NOMATCH; + goto done; } if (nmatch == 0 && !g->backrefs) break; /* no further info needed */ @@ -206,25 +262,24 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * sizeof(regmatch_t)); if (m->pmatch == NULL) { - STATETEARDOWN(m); - return(REG_ESPACE); + error = REG_ESPACE; + goto done; } - for (i = 1; i <= (int)m->g->nsub; i++) - m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; + for (i = 1; i <= m->g->nsub; i++) + m->pmatch[i].rm_so = m->pmatch[i].rm_eo = (regoff_t)-1; if (!g->backrefs && !(m->eflags®_BACKR)) { NOTE("dissecting"); dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (char **)malloc((g->nplus+1) * - sizeof(char *)); + m->lastpos = malloc((g->nplus+1) * + sizeof(const char *)); if (g->nplus > 0 && m->lastpos == NULL) { - free(m->pmatch); - STATETEARDOWN(m); - return(REG_ESPACE); + error = REG_ESPACE; + goto done; } NOTE("backref dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); } if (dp != NULL) break; @@ -242,12 +297,12 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], /* try it on a shorter possibility */ #ifndef NDEBUG for (i = 1; i <= m->g->nsub; i++) { - assert(m->pmatch[i].rm_so == -1); - assert(m->pmatch[i].rm_eo == -1); + assert(m->pmatch[i].rm_so == (regoff_t)-1); + assert(m->pmatch[i].rm_eo == (regoff_t)-1); } #endif NOTE("backoff dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); } assert(dp == NULL || dp == endp); if (dp != NULL) /* found a shorter one */ @@ -255,54 +310,72 @@ matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], /* despite initial appearances, there is no match here */ NOTE("false alarm"); - if (m->coldp == stop) - break; start = m->coldp + 1; /* recycle starting later */ + assert(start <= stop); } /* fill in the details if requested */ if (nmatch > 0) { + _DIAGASSERT(pmatch != NULL); pmatch[0].rm_so = m->coldp - m->offp; pmatch[0].rm_eo = endp - m->offp; } if (nmatch > 1) { assert(m->pmatch != NULL); - for (i = 1; i < (ssize_t)nmatch; i++) - if (i <= (int)m->g->nsub) + for (i = 1; i < nmatch; i++) + if (i <= m->g->nsub) pmatch[i] = m->pmatch[i]; else { - pmatch[i].rm_so = -1; - pmatch[i].rm_eo = -1; + pmatch[i].rm_so = (regoff_t)-1; + pmatch[i].rm_eo = (regoff_t)-1; } } - if (m->pmatch != NULL) - free((char *)m->pmatch); - if (m->lastpos != NULL) - free((char *)m->lastpos); +done: + if (m->pmatch != NULL) { + free(m->pmatch); + m->pmatch = NULL; + } + if (m->lastpos != NULL) { + free(m->lastpos); + m->lastpos = NULL; + } STATETEARDOWN(m); - return(0); + return error; } /* - dissect - figure out what matched what, no back references + == static const char *dissect(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst); */ -static char * /* == stop (success) always */ -dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +static const char * /* == stop (success) always */ +dissect( + struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst) { int i; sopno ss; /* start sop of current subRE */ sopno es; /* end sop of current subRE */ - char *sp; /* start of string matched by it */ - char *stp; /* string matched by it cannot pass here */ - char *rest; /* start of rest of string */ - char *tail; /* string unmatched by rest of RE */ + const char *sp; /* start of string matched by it */ + const char *stp; /* string matched by it cannot pass here */ + const char *rest; /* start of rest of string */ + const char *tail; /* string unmatched by rest of RE */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ - char *ssp; /* start of string matched by subsubRE */ - char *sep; /* end of string matched by subsubRE */ - char *oldssp; /* previous ssp */ - char *dp; + const char *ssp; /* start of string matched by subsubRE */ + const char *sep; /* end of string matched by subsubRE */ + const char *oldssp; /* previous ssp */ +#ifndef NDEBUG + const char *dp; +#endif + + _DIAGASSERT(m != NULL); + _DIAGASSERT(start != NULL); + _DIAGASSERT(stop != NULL); AT("diss", start, stop, startst, stopst); sp = start; @@ -361,7 +434,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) esub = es - 1; /* did innards match? */ if (slow(m, sp, rest, ssub, esub) != NULL) { - dp = dissect(m, sp, rest, ssub, esub); +#ifdef NDEBUG + (void) +#else + dp = +#endif + dissect(m, sp, rest, ssub, esub); assert(dp == rest); } else /* no */ assert(sp == rest); @@ -399,7 +477,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) } assert(sep == rest); /* must exhaust substring */ assert(slow(m, ssp, sep, ssub, esub) == rest); - dp = dissect(m, ssp, sep, ssub, esub); +#ifdef NDEBUG + (void) +#else + dp = +#endif + dissect(m, ssp, sep, ssub, esub); assert(dp == sep); sp = rest; break; @@ -434,7 +517,12 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) else assert(OP(m->g->strip[esub]) == O_CH); } - dp = dissect(m, sp, rest, ssub, esub); +#ifdef NDEBUG + (void) +#else + dp = +#endif + dissect(m, sp, rest, ssub, esub); assert(dp == rest); sp = rest; break; @@ -467,24 +555,35 @@ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) /* - backref - figure out what matched what, figuring in back references + == static const char *backref(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst, sopno lev); */ -static char * /* == stop (success) or NULL (failure) */ -backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, - sopno lev, int rec) /* PLUS nesting level */ +static const char * /* == stop (success) or NULL (failure) */ +backref( + struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst, + sopno lev) /* PLUS nesting level */ { int i; sopno ss; /* start sop of current subRE */ - char *sp; /* start of string matched by it */ + const char *sp; /* start of string matched by it */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ - char *ssp; /* start of string matched by subsubRE */ - char *dp; + const char *ssp; /* start of string matched by subsubRE */ + const char *dp; size_t len; int hard; sop s; regoff_t offsave; cset *cs; + _DIAGASSERT(m != NULL); + _DIAGASSERT(start != NULL); + _DIAGASSERT(stop != NULL); + AT("back", start, stop, startst, stopst); sp = start; @@ -572,51 +671,50 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, case OBACK_: /* the vilest depths */ i = OPND(s); assert(0 < i && i <= m->g->nsub); - if (m->pmatch[i].rm_eo == -1) + if (m->pmatch[i].rm_eo == (regoff_t)-1) return(NULL); - assert(m->pmatch[i].rm_so != -1); - len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; - if (len == 0 && rec++ > MAX_RECURSION) + assert(m->pmatch[i].rm_so != (regoff_t)-1); + len = (size_t)(m->pmatch[i].rm_eo - m->pmatch[i].rm_so); + if (len == 0) return(NULL); assert(stop - m->beginp >= len); if (sp > stop - len) return(NULL); /* not enough left to match */ - ssp = m->offp + m->pmatch[i].rm_so; + ssp = m->offp + (size_t)m->pmatch[i].rm_so; if (memcmp(sp, ssp, len) != 0) return(NULL); while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; - return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); - break; + return(backref(m, sp+len, stop, ss+1, stopst, lev)); + case OQUEST_: /* to null or not */ - dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + dp = backref(m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); /* not */ - return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); - break; + return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); + case OPLUS_: assert(m->lastpos != NULL); assert(lev+1 <= m->g->nplus); m->lastpos[lev+1] = sp; - return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); - break; + return(backref(m, sp, stop, ss+1, stopst, lev+1)); + case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ - return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + return(backref(m, sp, stop, ss+1, stopst, lev-1)); /* try another pass */ m->lastpos[lev] = sp; - dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); if (dp == NULL) - return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); - else - return(dp); - break; + dp = backref(m, sp, stop, ss+1, stopst, lev-1); + return(dp); + case OCH_: /* find the right one, if any */ ssub = ss + 1; esub = ss + OPND(s) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ - dp = backref(m, sp, stop, ssub, esub, lev, rec); + dp = backref(m, sp, stop, ssub, esub, lev); if (dp != NULL) return(dp); /* that one missed, try next one */ @@ -631,29 +729,29 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, else assert(OP(m->g->strip[esub]) == O_CH); } - break; + case OLPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_so; m->pmatch[i].rm_so = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + dp = backref(m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); m->pmatch[i].rm_so = offsave; return(NULL); - break; + case ORPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; m->pmatch[i].rm_eo = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + dp = backref(m, sp, stop, ss+1, stopst, lev); if (dp != NULL) return(dp); m->pmatch[i].rm_eo = offsave; return(NULL); - break; + default: /* uh oh */ assert(nope); break; @@ -662,24 +760,35 @@ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, /* "can't happen" */ assert(nope); /* NOTREACHED */ - return 0; + return NULL; } /* - fast - step through the string at top speed + == static const char *fast(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst); */ -static char * /* where tentative match ended, or NULL */ -fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +static const char * /* where tentative match ended, or NULL */ +fast( + struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst) { states st = m->st; states fresh = m->fresh; states tmp = m->tmp; - char *p = start; + const char *p = start; int c = (start == m->beginp) ? OUT : *(start-1); int lastc; /* previous c */ int flagch; - int i; - char *coldp; /* last p after which no match was underway */ + size_t i; + const char *coldp; /* last p after which no match was underway */ + + _DIAGASSERT(m != NULL); + _DIAGASSERT(start != NULL); + _DIAGASSERT(stop != NULL); CLEAR(st); SET1(st, startst); @@ -751,19 +860,30 @@ fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) /* - slow - step through the string more deliberately + == static const char *slow(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst); */ -static char * /* where it ended */ -slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +static const char * /* where it ended */ +slow( + struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst) { states st = m->st; states empty = m->empty; states tmp = m->tmp; - char *p = start; + const char *p = start; int c = (start == m->beginp) ? OUT : *(start-1); int lastc; /* previous c */ int flagch; - int i; - char *matchp; /* last p at which a match ended */ + size_t i; + const char *matchp; /* last p at which a match ended */ + + _DIAGASSERT(m != NULL); + _DIAGASSERT(start != NULL); + _DIAGASSERT(stop != NULL); AT("slow", start, stop, startst, stopst); CLEAR(st); @@ -831,9 +951,21 @@ slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) /* - step - map set of states reachable before char to set reachable after + == static states step(struct re_guts *g, sopno start, sopno stop, \ + == states bef, int ch, states aft); + == #define BOL (OUT+1) + == #define EOL (BOL+1) + == #define BOLEOL (BOL+2) + == #define NOTHING (BOL+3) + == #define BOW (BOL+4) + == #define EOW (BOL+5) + == #define CODEMAX (BOL+5) // highest code used + == #define NONCHAR(c) ((c) > CHAR_MAX) + == #define NNONCHAR (CODEMAX-CHAR_MAX) */ static states -step(struct re_guts *g, +step( + struct re_guts *g, sopno start, /* start state within strip */ sopno stop, /* state after stop state within strip */ states bef, /* states reachable before */ @@ -847,6 +979,8 @@ step(struct re_guts *g, sopno look; int i; + _DIAGASSERT(g != NULL); + for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { s = g->strip[pc]; switch (OP(s)) { @@ -948,47 +1082,79 @@ step(struct re_guts *g, #ifdef REDEBUG /* - print - print a set of states + == #ifdef REDEBUG + == static void print(struct match *m, char *caption, states st, \ + == int ch, FILE *d); + == #endif */ static void -print(struct match *m, char *caption, states st, int ch, FILE *d) +print( + struct match *m, + char *caption, + states st, + int ch, + FILE *d) { struct re_guts *g = m->g; int i; int first = 1; + _DIAGASSERT(m != NULL); + _DIAGASSERT(caption != NULL); + if (!(m->eflags®_TRACE)) return; - (void)fprintf(d, "%s", caption); + _DIAGASSERT(d != NULL); + + fprintf(d, "%s", caption); if (ch != '\0') - (void)fprintf(d, " %s", pchar(ch)); + fprintf(d, " %s", pchar(ch)); for (i = 0; i < g->nstates; i++) if (ISSET(st, i)) { - (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); + fprintf(d, "%s%d", (first) ? "\t" : ", ", i); first = 0; } - (void)fprintf(d, "\n"); + fprintf(d, "\n"); } /* - at - print current situation + == #ifdef REDEBUG + == static void at(struct match *m, char *title, char *start, char *stop, \ + == sopno startst, sopno stopst); + == #endif */ static void -at(struct match *m, char *title, char *start, char *stop, sopno startst, +at( + struct match *m, + char *title, + char *start, + char *stop, + sopno startst, sopno stopst) { + + _DIAGASSERT(m != NULL); + _DIAGASSERT(title != NULL); + _DIAGASSERT(start != NULL); + _DIAGASSERT(stop != NULL); + if (!(m->eflags®_TRACE)) return; - (void)printf("%s %s-", title, pchar(*start)); - (void)printf("%s ", pchar(*stop)); - (void)printf("%ld-%ld\n", (long)startst, (long)stopst); + printf("%s %s-", title, pchar(*start)); + printf("%s ", pchar(*stop)); + printf("%ld-%ld\n", (long)startst, (long)stopst); } #ifndef PCHARDONE #define PCHARDONE /* never again */ /* - pchar - make a character printable + == #ifdef REDEBUG + == static char *pchar(int ch); + == #endif * * Is this identical to regchar() over in debug.c? Well, yes. But a * duplicate here avoids having a debugging-capable regexec.o tied to @@ -996,7 +1162,8 @@ at(struct match *m, char *title, char *start, char *stop, sopno startst, * the non-debug compilation anyway, so it doesn't matter much. */ static char * /* -> representation */ -pchar(int ch) +pchar( + int ch) { static char pbuf[10]; diff --git a/libc/regex/regcomp.c b/libc/upstream-netbsd/libc/regex/regcomp.c index 19f4790..2644a22 100644 --- a/libc/regex/regcomp.c +++ b/libc/upstream-netbsd/libc/regex/regcomp.c @@ -1,6 +1,6 @@ -/* $OpenBSD: regcomp.c,v 1.19 2008/02/23 08:13:07 otto Exp $ */ +/* $NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $ */ + /*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * @@ -34,14 +34,67 @@ * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 */ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 + */ + +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; +#else +__RCSID("$NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $"); +#endif +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" #include <sys/types.h> -#include <stdio.h> -#include <string.h> + +#include <assert.h> #include <ctype.h> #include <limits.h> +#include <stdio.h> #include <stdlib.h> +#include <string.h> #include <regex.h> +#ifdef __weak_alias +__weak_alias(regcomp,_regcomp) +#endif + #include "utils.h" #include "regex2.h" @@ -53,56 +106,72 @@ * other clumsinesses */ struct parse { - char *next; /* next character in RE */ - char *end; /* end of string (-> NUL normally) */ + const char *next; /* next character in RE */ + const char *end; /* end of string (-> NUL normally) */ int error; /* has an error been seen? */ sop *strip; /* malloced strip */ sopno ssize; /* malloced strip size (allocated) */ sopno slen; /* malloced strip length (used) */ - int ncsalloc; /* number of csets allocated */ + size_t ncsalloc; /* number of csets allocated */ struct re_guts *g; # define NPAREN 10 /* we need to remember () 1-9 for back refs */ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ sopno pend[NPAREN]; /* -> ) ([0] unused) */ }; -static void p_ere(struct parse *, int); -static void p_ere_exp(struct parse *); -static void p_str(struct parse *); -static void p_bre(struct parse *, int, int); -static int p_simp_re(struct parse *, int); -static int p_count(struct parse *); -static void p_bracket(struct parse *); -static void p_b_term(struct parse *, cset *); -static void p_b_cclass(struct parse *, cset *); -static void p_b_eclass(struct parse *, cset *); -static char p_b_symbol(struct parse *); -static char p_b_coll_elem(struct parse *, int); -static char othercase(int); -static void bothcases(struct parse *, int); -static void ordinary(struct parse *, int); -static void nonnewline(struct parse *); -static void repeat(struct parse *, sopno, int, int); -static int seterr(struct parse *, int); -static cset *allocset(struct parse *); -static void freeset(struct parse *, cset *); -static int freezeset(struct parse *, cset *); -static int firstch(struct parse *, cset *); -static int nch(struct parse *, cset *); -static void mcadd(struct parse *, cset *, char *); -static void mcinvert(struct parse *, cset *); -static void mccase(struct parse *, cset *); -static int isinsets(struct re_guts *, int); -static int samesets(struct re_guts *, int, int); -static void categorize(struct parse *, struct re_guts *); -static sopno dupl(struct parse *, sopno, sopno); -static void doemit(struct parse *, sop, size_t); -static void doinsert(struct parse *, sop, size_t, sopno); -static void dofwd(struct parse *, sopno, sop); -static void enlarge(struct parse *, sopno); -static void stripsnug(struct parse *, struct re_guts *); -static void findmust(struct parse *, struct re_guts *); -static sopno pluscount(struct parse *, struct re_guts *); +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === regcomp.c === */ +static void p_ere(struct parse *p, int stop, size_t reclimit); +static void p_ere_exp(struct parse *p, size_t reclimit); +static void p_str(struct parse *p); +static void p_bre(struct parse *p, int end1, int end2, size_t reclimit); +static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); +static int p_count(struct parse *p); +static void p_bracket(struct parse *p); +static void p_b_term(struct parse *p, cset *cs); +static void p_b_cclass(struct parse *p, cset *cs); +static void p_b_eclass(struct parse *p, cset *cs); +static char p_b_symbol(struct parse *p); +static char p_b_coll_elem(struct parse *p, int endc); +static int othercase(int ch); +static void bothcases(struct parse *p, int ch); +static void ordinary(struct parse *p, int ch); +static void nonnewline(struct parse *p); +static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit); +static int seterr(struct parse *p, int e); +static cset *allocset(struct parse *p); +static void freeset(struct parse *p, cset *cs); +static sopno freezeset(struct parse *p, cset *cs); +static int firstch(struct parse *p, cset *cs); +static int nch(struct parse *p, cset *cs); +static void mcadd(struct parse *p, cset *cs, const char *cp); +#if 0 +static void mcsub(cset *cs, char *cp); +static int mcin(cset *cs, char *cp); +static char *mcfind(cset *cs, char *cp); +#endif +static void mcinvert(struct parse *p, cset *cs); +static void mccase(struct parse *p, cset *cs); +static int isinsets(struct re_guts *g, int c); +static int samesets(struct re_guts *g, int c1, int c2); +static void categorize(struct parse *p, struct re_guts *g); +static sopno dupl(struct parse *p, sopno start, sopno finish); +static void doemit(struct parse *p, sop op, sopno opnd); +static void doinsert(struct parse *p, sop op, sopno opnd, sopno pos); +static void dofwd(struct parse *p, sopno pos, sopno value); +static int enlarge(struct parse *p, sopno size); +static void stripsnug(struct parse *p, struct re_guts *g); +static void findmust(struct parse *p, struct re_guts *g); +static sopno pluscount(struct parse *p, struct re_guts *g); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ static char nuls[10]; /* place to point scanner in event of error */ @@ -123,11 +192,11 @@ static char nuls[10]; /* place to point scanner in event of error */ #define NEXTn(n) (p->next += (n)) #define GETNEXT() (*p->next++) #define SETERROR(e) seterr(p, (e)) -#define REQUIRE(co, e) ((co) || SETERROR(e)) +#define REQUIRE(co, e) (void) ((co) || SETERROR(e)) #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) -#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) +#define MUSTEAT(c, e) (void) (REQUIRE(MORE() && GETNEXT() == (c), e)) #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) -#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), sopnd) #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) @@ -142,11 +211,30 @@ static int never = 0; /* for use in asserts; shuts lint up */ #define never 0 /* some <assert.h>s have bugs too */ #endif +#define MEMLIMIT 0x8000000 +#define MEMSIZE(p) \ + ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \ + (p)->ncsalloc * sizeof(cset) + \ + (p)->ssize * sizeof(sop)) +#define RECLIMIT 256 + /* - regcomp - interface for parser and compilation + = extern int regcomp(regex_t *, const char *, int); + = #define REG_BASIC 0000 + = #define REG_EXTENDED 0001 + = #define REG_ICASE 0002 + = #define REG_NOSUB 0004 + = #define REG_NEWLINE 0010 + = #define REG_NOSPEC 0020 + = #define REG_PEND 0040 + = #define REG_DUMP 0200 */ int /* 0 success, otherwise REG_something */ -regcomp(regex_t *preg, const char *pattern, int cflags) +regcomp( + regex_t *preg, + const char *pattern, + int cflags) { struct parse pa; struct re_guts *g; @@ -159,6 +247,9 @@ regcomp(regex_t *preg, const char *pattern, int cflags) # define GOODFLAGS(f) ((f)&~REG_DUMP) #endif + _DIAGASSERT(preg != NULL); + _DIAGASSERT(pattern != NULL); + cflags = GOODFLAGS(cflags); if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) return(REG_INVARG); @@ -168,7 +259,7 @@ regcomp(regex_t *preg, const char *pattern, int cflags) return(REG_INVARG); len = preg->re_endp - pattern; } else - len = strlen((char *)pattern); + len = strlen(pattern); /* do the mallocs early so failure handling is easy */ g = (struct re_guts *)malloc(sizeof(struct re_guts) + @@ -176,16 +267,16 @@ regcomp(regex_t *preg, const char *pattern, int cflags) if (g == NULL) return(REG_ESPACE); p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ - p->strip = (sop *)calloc(p->ssize, sizeof(sop)); + p->strip = malloc(p->ssize * sizeof(sop)); p->slen = 0; if (p->strip == NULL) { - free((char *)g); + free(g); return(REG_ESPACE); } /* set things up */ p->g = g; - p->next = (char *)pattern; /* convenience; we do not modify it */ + p->next = pattern; p->end = p->next + len; p->error = 0; p->ncsalloc = 0; @@ -213,11 +304,11 @@ regcomp(regex_t *preg, const char *pattern, int cflags) EMIT(OEND, 0); g->firststate = THERE(); if (cflags®_EXTENDED) - p_ere(p, OUT); + p_ere(p, OUT, 0); else if (cflags®_NOSPEC) p_str(p); else - p_bre(p, OUT, OUT); + p_bre(p, OUT, OUT, 0); EMIT(OEND, 0); g->laststate = THERE(); @@ -244,21 +335,32 @@ regcomp(regex_t *preg, const char *pattern, int cflags) /* - p_ere - ERE parser top level, concatenation and alternation + == static void p_ere(struct parse *p, int stop, size_t reclimit); */ static void -p_ere(struct parse *p, int stop) /* character this ERE should end at */ +p_ere( + struct parse *p, + int stop, /* character this ERE should end at */ + size_t reclimit) { char c; - sopno prevback = 0; - sopno prevfwd = 0; + sopno prevback = 0; /* pacify gcc */ + sopno prevfwd = 0; /* pacify gcc */ sopno conc; int first = 1; /* is this the first alternative? */ + _DIAGASSERT(p != NULL); + + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + for (;;) { /* do a bunch of concatenated expressions */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) - p_ere_exp(p); + p_ere_exp(p, reclimit); REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) @@ -287,9 +389,12 @@ p_ere(struct parse *p, int stop) /* character this ERE should end at */ /* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op + == static void p_ere_exp(struct parse *p, size_t reclimit); */ static void -p_ere_exp(struct parse *p) +p_ere_exp( + struct parse *p, + size_t reclimit) { char c; sopno pos; @@ -298,6 +403,8 @@ p_ere_exp(struct parse *p) sopno subno; int wascaret = 0; + _DIAGASSERT(p != NULL); + assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); @@ -311,7 +418,7 @@ p_ere_exp(struct parse *p) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); if (!SEE(')')) - p_ere(p, ')'); + p_ere(p, ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -365,7 +472,7 @@ p_ere_exp(struct parse *p) ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ - REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); + REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c); @@ -377,7 +484,7 @@ p_ere_exp(struct parse *p) c = PEEK(); /* we call { a repetition if followed by a digit */ if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) + (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) )) return; /* no repetition, we're done */ NEXT(); @@ -406,14 +513,14 @@ p_ere_exp(struct parse *p) case '{': count = p_count(p); if (EAT(',')) { - if (isdigit((uch)PEEK())) { + if (isdigit((unsigned char)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, 0); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); @@ -427,17 +534,22 @@ p_ere_exp(struct parse *p) return; c = PEEK(); if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) + (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) ) return; SETERROR(REG_BADRPT); } /* - p_str - string (no metacharacters) "parser" + == static void p_str(struct parse *p); */ static void -p_str(struct parse *p) +p_str( + struct parse *p) { + + _DIAGASSERT(p != NULL); + REQUIRE(MORE(), REG_EMPTY); while (MORE()) ordinary(p, GETNEXT()); @@ -445,6 +557,8 @@ p_str(struct parse *p) /* - p_bre - BRE parser top level, anchoring and concatenation + == static void p_bre(struct parse *p, int end1, \ + == int end2, size_t reclimit); * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first @@ -454,21 +568,32 @@ p_str(struct parse *p) * The amount of lookahead needed to avoid this kludge is excessive. */ static void -p_bre(struct parse *p, +p_bre( + struct parse *p, int end1, /* first terminating character */ - int end2) /* second terminating character */ + int end2, /* second terminating character */ + size_t reclimit) { - sopno start = HERE(); + sopno start; int first = 1; /* first subexpression? */ int wasdollar = 0; + _DIAGASSERT(p != NULL); + + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + + start = HERE(); + if (EAT('^')) { EMIT(OBOL, 0); p->g->iflags |= USEBOL; p->g->nbol++; } while (MORE() && !SEETWO(end1, end2)) { - wasdollar = p_simp_re(p, first); + wasdollar = p_simp_re(p, first, reclimit); first = 0; } if (wasdollar) { /* oops, that was a trailing anchor */ @@ -483,26 +608,30 @@ p_bre(struct parse *p, /* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition + == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); */ static int /* was the simple RE an unbackslashed $? */ -p_simp_re(struct parse *p, - int starordinary) /* is a leading * an ordinary character? */ +p_simp_re( + struct parse *p, + int starordinary, /* is a leading * an ordinary character? */ + size_t reclimit) { int c; int count; int count2; - sopno pos; - int i; + sopno pos, i; sopno subno; # define BACKSL (1<<CHAR_BIT) + _DIAGASSERT(p != NULL); + pos = HERE(); /* repetion op, if any, covers from here */ assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); if (c == '\\') { REQUIRE(MORE(), REG_EESCAPE); - c = BACKSL | GETNEXT(); + c = BACKSL | (unsigned char)GETNEXT(); } switch (c) { case '.': @@ -525,7 +654,7 @@ p_simp_re(struct parse *p, EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) - p_bre(p, '\\', ')'); + p_bre(p, '\\', ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -564,7 +693,7 @@ p_simp_re(struct parse *p, REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: - ordinary(p, (char)c); + ordinary(p, c &~ BACKSL); break; } @@ -577,21 +706,21 @@ p_simp_re(struct parse *p, } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { - if (MORE() && isdigit((uch)PEEK())) { + if (MORE() && isdigit((unsigned char)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, 0); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } - } else if (c == '$') /* $ (but not \$) ends it */ + } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ return(1); return(0); @@ -599,14 +728,18 @@ p_simp_re(struct parse *p, /* - p_count - parse a repetition count + == static int p_count(struct parse *p); */ static int /* the value */ -p_count(struct parse *p) +p_count( + struct parse *p) { int count = 0; int ndigits = 0; - while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { + _DIAGASSERT(p != NULL); + + while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); ndigits++; } @@ -617,33 +750,37 @@ p_count(struct parse *p) /* - p_bracket - parse a bracketed character list + == static void p_bracket(struct parse *p); * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */ static void -p_bracket(struct parse *p) +p_bracket( + struct parse *p) { cset *cs; int invert = 0; + _DIAGASSERT(p != NULL); + + cs = allocset(p); + if (cs == NULL) + return; /* Dept of Truly Sickening Special-Case Kludges */ - if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", + (size_t)6) == 0) { EMIT(OBOW, 0); NEXTn(6); return; } - if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", + (size_t)6) == 0) { EMIT(OEOW, 0); NEXTn(6); return; } - if ((cs = allocset(p)) == NULL) { - /* allocset did set error status in p */ - return; - } - if (EAT('^')) invert++; /* make note to invert set at end */ if (EAT(']')) @@ -656,18 +793,16 @@ p_bracket(struct parse *p) CHadd(cs, '-'); MUSTEAT(']', REG_EBRACK); - if (p->error != 0) { /* don't mess things up further */ - freeset(p, cs); + if (p->error != 0) /* don't mess things up further */ return; - } if (p->g->cflags®_ICASE) { - int i; + ssize_t i; int ci; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i) && isalpha(i)) { - ci = othercase(i); + ci = othercase((int)i); if (ci != i) CHadd(cs, ci); } @@ -675,13 +810,13 @@ p_bracket(struct parse *p) mccase(p, cs); } if (invert) { - int i; + ssize_t i; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i)) - CHsub(cs, i); + CHsub(cs, (int)i); else - CHadd(cs, i); + CHadd(cs, (int)i); if (p->g->cflags®_NEWLINE) CHsub(cs, '\n'); if (cs->multis != NULL) @@ -699,23 +834,30 @@ p_bracket(struct parse *p) /* - p_b_term - parse one term of a bracketed character list + == static void p_b_term(struct parse *p, cset *cs); */ static void -p_b_term(struct parse *p, cset *cs) +p_b_term( + struct parse *p, + cset *cs) { char c; char start, finish; int i; + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { case '[': c = (MORE2()) ? PEEK2() : '\0'; break; + case '-': SETERROR(REG_ERANGE); return; /* NOTE RETURN */ - break; + default: c = '\0'; break; @@ -762,17 +904,25 @@ p_b_term(struct parse *p, cset *cs) /* - p_b_cclass - parse a character-class name and deal with it + == static void p_b_cclass(struct parse *p, cset *cs); */ static void -p_b_cclass(struct parse *p, cset *cs) +p_b_cclass( + struct parse *p, + cset *cs) { - char *sp = p->next; + const char *sp; const struct cclass *cp; size_t len; - char *u; + const char *u; char c; - while (MORE() && isalpha(PEEK())) + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + + sp = p->next; + + while (MORE() && isalpha((unsigned char)PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) @@ -793,26 +943,36 @@ p_b_cclass(struct parse *p, cset *cs) /* - p_b_eclass - parse an equivalence-class name and deal with it + == static void p_b_eclass(struct parse *p, cset *cs); * * This implementation is incomplete. xxx */ static void -p_b_eclass(struct parse *p, cset *cs) +p_b_eclass( + struct parse *p, + cset *cs) { char c; + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + c = p_b_coll_elem(p, '='); CHadd(cs, c); } /* - p_b_symbol - parse a character or [..]ed multicharacter collating symbol + == static char p_b_symbol(struct parse *p); */ static char /* value of symbol */ -p_b_symbol(struct parse *p) +p_b_symbol( + struct parse *p) { char value; + _DIAGASSERT(p != NULL); + REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) return(GETNEXT()); @@ -825,14 +985,20 @@ p_b_symbol(struct parse *p) /* - p_b_coll_elem - parse a collating-element name and look it up + == static char p_b_coll_elem(struct parse *p, int endc); */ static char /* value of collating element */ -p_b_coll_elem(struct parse *p, +p_b_coll_elem( + struct parse *p, int endc) /* name ended by endc,']' */ { - char *sp = p->next; + const char *sp; const struct cname *cp; - int len; + size_t len; + + _DIAGASSERT(p != NULL); + + sp = p->next; while (MORE() && !SEETWO(endc, ']')) NEXT(); @@ -852,33 +1018,41 @@ p_b_coll_elem(struct parse *p, /* - othercase - return the case counterpart of an alphabetic + == static int othercase(int ch); */ -static char /* if no counterpart, return ch */ -othercase(int ch) +static int /* if no counterpart, return ch */ +othercase( + int ch) { - ch = (uch)ch; assert(isalpha(ch)); if (isupper(ch)) - return ((uch)tolower(ch)); + return(tolower(ch)); else if (islower(ch)) - return ((uch)toupper(ch)); + return(toupper(ch)); else /* peculiar, but could happen */ return(ch); } /* - bothcases - emit a dualcase version of a two-case character + == static void bothcases(struct parse *p, int ch); * * Boy, is this implementation ever a kludge... */ static void -bothcases(struct parse *p, int ch) +bothcases( + struct parse *p, + int ch) { - char *oldnext = p->next; - char *oldend = p->end; + const char *oldnext; + const char *oldend; char bracket[3]; - ch = (uch)ch; + _DIAGASSERT(p != NULL); + + oldnext = p->next; + oldend = p->end; + assert(othercase(ch) != ch); /* p_bracket() would recurse */ p->next = bracket; p->end = bracket+2; @@ -893,33 +1067,50 @@ bothcases(struct parse *p, int ch) /* - ordinary - emit an ordinary character + == static void ordinary(struct parse *p, int ch); */ static void -ordinary(struct parse *p, int ch) +ordinary( + struct parse *p, + int ch) { - cat_t *cap = p->g->categories; + cat_t *cap; + + _DIAGASSERT(p != NULL); - if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) - bothcases(p, ch); + cap = p->g->categories; + if ((p->g->cflags®_ICASE) && isalpha((unsigned char) ch) + && othercase((unsigned char) ch) != (unsigned char) ch) + bothcases(p, (unsigned char) ch); else { - EMIT(OCHAR, (uch)ch); - if (cap[ch] == 0) - cap[ch] = p->g->ncategories++; + EMIT(OCHAR, (sopno)(unsigned char)ch); + if (cap[ch] == 0) { + _DIAGASSERT(__type_fit(unsigned char, + p->g->ncategories + 1)); + cap[ch] = (unsigned char)p->g->ncategories++; + } } } /* - nonnewline - emit REG_NEWLINE version of OANY + == static void nonnewline(struct parse *p); * * Boy, is this implementation ever a kludge... */ static void -nonnewline(struct parse *p) +nonnewline( + struct parse *p) { - char *oldnext = p->next; - char *oldend = p->end; + const char *oldnext; + const char *oldend; char bracket[4]; + _DIAGASSERT(p != NULL); + + oldnext = p->next; + oldend = p->end; + p->next = bracket; p->end = bracket+3; bracket[0] = '^'; @@ -934,23 +1125,33 @@ nonnewline(struct parse *p) /* - repeat - generate code for a bounded repetition, recursively if needed + == static void repeat(struct parse *p, sopno start, int from, int to, + == size_t reclimit); */ static void -repeat(struct parse *p, +repeat( + struct parse *p, sopno start, /* operand from here to end of strip */ int from, /* repeated from this number */ - int to) /* to this number of times (maybe INFINITY) */ + int to, /* to this number of times (maybe INFINITY) */ + size_t reclimit) { - sopno finish = HERE(); + sopno finish; # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) sopno copy; - if (p->error != 0) /* head off possible runaway recursion */ + _DIAGASSERT(p != NULL); + + if (reclimit++ > RECLIMIT) + p->error = REG_ESPACE; + if (p->error) return; + finish = HERE(); + assert(from <= to); switch (REP(MAP(from), MAP(to))) { @@ -962,7 +1163,7 @@ repeat(struct parse *p, case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ - repeat(p, start+1, 1, to); + repeat(p, start+1, 1, to, reclimit); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); @@ -982,7 +1183,7 @@ repeat(struct parse *p, ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); - repeat(p, copy, 1, to-1); + repeat(p, copy, 1, to-1, reclimit); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); @@ -990,11 +1191,11 @@ repeat(struct parse *p, break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to-1); + repeat(p, copy, from-1, to-1, reclimit); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to); + repeat(p, copy, from-1, to, reclimit); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ @@ -1004,10 +1205,16 @@ repeat(struct parse *p, /* - seterr - set an error condition + == static int seterr(struct parse *p, int e); */ static int /* useless but makes type checking happy */ -seterr(struct parse *p, int e) +seterr( + struct parse *p, + int e) { + + _DIAGASSERT(p != NULL); + if (p->error == 0) /* keep earliest error condition */ p->error = e; p->next = nuls; /* try to bring things to a halt */ @@ -1017,81 +1224,92 @@ seterr(struct parse *p, int e) /* - allocset - allocate a set of characters for [] + == static cset *allocset(struct parse *p); */ static cset * -allocset(struct parse *p) +allocset( + struct parse *p) { - int no = p->g->ncsets++; + size_t no; size_t nc; size_t nbytes; cset *cs; - size_t css = (size_t)p->g->csetsize; - int i; + size_t css; + size_t i; - if (no >= p->ncsalloc) { /* need another column of space */ - void *ptr; + _DIAGASSERT(p != NULL); + no = p->g->ncsets++; + css = (size_t)p->g->csetsize; + if (no >= p->ncsalloc) { /* need another column of space */ p->ncsalloc += CHAR_BIT; nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; - - ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); - if (ptr == NULL) - goto nomem; - p->g->sets = ptr; - - ptr = (uch *)realloc((char *)p->g->setbits, nbytes); - if (ptr == NULL) - goto nomem; - p->g->setbits = ptr; - - for (i = 0; i < no; i++) - p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); - - (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; + if (p->g->sets == NULL) + p->g->sets = malloc(nc * sizeof(cset)); + else + p->g->sets = realloc(p->g->sets, nc * sizeof(cset)); + if (p->g->setbits == NULL) + p->g->setbits = malloc(nbytes); + else { + p->g->setbits = realloc(p->g->setbits, nbytes); + /* xxx this isn't right if setbits is now NULL */ + for (i = 0; i < no; i++) + p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); + } + if (p->g->sets != NULL && p->g->setbits != NULL) + (void) memset((char *)p->g->setbits + (nbytes - css), + 0, css); + else { +oomem: + no = 0; + SETERROR(REG_ESPACE); + /* caller's responsibility not to do set ops */ + return NULL; + } } - /* XXX should not happen */ - if (p->g->sets == NULL || p->g->setbits == NULL) - goto nomem; cs = &p->g->sets[no]; cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); - cs->mask = 1 << ((no) % CHAR_BIT); + cs->mask = 1 << (unsigned int)((no) % CHAR_BIT); cs->hash = 0; cs->smultis = 0; cs->multis = NULL; return(cs); -nomem: - free(p->g->sets); - p->g->sets = NULL; - free(p->g->setbits); - p->g->setbits = NULL; - - SETERROR(REG_ESPACE); - /* caller's responsibility not to do set ops */ - return(NULL); } /* - freeset - free a now-unused set + == static void freeset(struct parse *p, cset *cs); */ static void -freeset(struct parse *p, cset *cs) +freeset( + struct parse *p, + cset *cs) { - int i; - cset *top = &p->g->sets[p->g->ncsets]; - size_t css = (size_t)p->g->csetsize; + size_t i; + cset *top; + size_t css; + + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); - for (i = 0; i < (ssize_t)css; i++) - CHsub(cs, i); + top = &p->g->sets[p->g->ncsets]; + css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + CHsub(cs, (int)i); if (cs == top-1) /* recover only the easy case */ p->g->ncsets--; } /* - freezeset - final processing on a set of characters + == static int freezeset(struct parse *p, cset *cs); * * The main task here is merging identical sets. This is usually a waste * of time (although the hash code minimizes the overhead), but can win @@ -1099,23 +1317,32 @@ freeset(struct parse *p, cset *cs) * is done using addition rather than xor -- all ASCII [aA] sets xor to * the same value! */ -static int /* set number */ -freezeset(struct parse *p, cset *cs) +static sopno /* set number */ +freezeset( + struct parse *p, + cset *cs) { - uch h = cs->hash; - int i; - cset *top = &p->g->sets[p->g->ncsets]; + uch h; + size_t i; + cset *top; cset *cs2; - size_t css = (size_t)p->g->csetsize; + size_t css; + + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + + h = cs->hash; + top = &p->g->sets[p->g->ncsets]; + css = (size_t)p->g->csetsize; /* look for an earlier one which is the same */ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) if (cs2->hash == h && cs2 != cs) { /* maybe */ - for (i = 0; i < (ssize_t)css; i++) + for (i = 0; i < css; i++) if (!!CHIN(cs2, i) != !!CHIN(cs, i)) break; /* no */ - if (i == (ssize_t)css) + if (i == css) break; /* yes */ } @@ -1124,19 +1351,27 @@ freezeset(struct parse *p, cset *cs) cs = cs2; } - return((int)(cs - p->g->sets)); + return (sopno)(cs - p->g->sets); } /* - firstch - return first character in a set (which must have at least one) + == static int firstch(struct parse *p, cset *cs); */ static int /* character; there is no "none" value */ -firstch(struct parse *p, cset *cs) +firstch( + struct parse *p, + cset *cs) { - int i; - size_t css = (size_t)p->g->csetsize; + size_t i; + size_t css; + + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); - for (i = 0; i < (ssize_t)css; i++) + css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) if (CHIN(cs, i)) return((char)i); assert(never); @@ -1145,15 +1380,23 @@ firstch(struct parse *p, cset *cs) /* - nch - number of characters in a set + == static int nch(struct parse *p, cset *cs); */ static int -nch(struct parse *p, cset *cs) +nch( + struct parse *p, + cset *cs) { - int i; - size_t css = (size_t)p->g->csetsize; + size_t i; + size_t css; int n = 0; - for (i = 0; i < (ssize_t)css; i++) + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + + css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) if (CHIN(cs, i)) n++; return(n); @@ -1161,63 +1404,170 @@ nch(struct parse *p, cset *cs) /* - mcadd - add a collating element to a cset + == static void mcadd(struct parse *p, cset *cs, \ + == char *cp); */ static void -mcadd( struct parse *p, cset *cs, char *cp) +mcadd( + struct parse *p, + cset *cs, + const char *cp) { - size_t oldend = cs->smultis; - void *np; + size_t oldend; + + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + _DIAGASSERT(cp != NULL); + + oldend = cs->smultis; cs->smultis += strlen(cp) + 1; - np = realloc(cs->multis, cs->smultis); - if (np == NULL) { - if (cs->multis) - free(cs->multis); - cs->multis = NULL; + if (cs->multis == NULL) + cs->multis = malloc(cs->smultis); + else + cs->multis = realloc(cs->multis, cs->smultis); + if (cs->multis == NULL) { SETERROR(REG_ESPACE); return; } - cs->multis = np; - strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); + (void) strcpy(cs->multis + oldend - 1, cp); + cs->multis[cs->smultis - 1] = '\0'; } +#if 0 +/* + - mcsub - subtract a collating element from a cset + == static void mcsub(cset *cs, char *cp); + */ +static void +mcsub( + cset *cs, + char *cp) +{ + char *fp; + size_t len; + + _DIAGASSERT(cs != NULL); + _DIAGASSERT(cp != NULL); + + fp = mcfind(cs, cp); + len = strlen(fp); + + assert(fp != NULL); + (void) memmove(fp, fp + len + 1, + cs->smultis - (fp + len + 1 - cs->multis)); + cs->smultis -= len; + + if (cs->smultis == 0) { + free(cs->multis); + cs->multis = NULL; + return; + } + + cs->multis = realloc(cs->multis, cs->smultis); + assert(cs->multis != NULL); +} + +/* + - mcin - is a collating element in a cset? + == static int mcin(cset *cs, char *cp); + */ +static int +mcin( + cset *cs, + char *cp) +{ + + _DIAGASSERT(cs != NULL); + _DIAGASSERT(cp != NULL); + + return(mcfind(cs, cp) != NULL); +} + +/* + - mcfind - find a collating element in a cset + == static char *mcfind(cset *cs, char *cp); + */ +static char * +mcfind( + cset *cs, + char *cp) +{ + char *p; + + _DIAGASSERT(cs != NULL); + _DIAGASSERT(cp != NULL); + + if (cs->multis == NULL) + return(NULL); + for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) + if (strcmp(cp, p) == 0) + return(p); + return(NULL); +} +#endif + /* - mcinvert - invert the list of collating elements in a cset + == static void mcinvert(struct parse *p, cset *cs); * * This would have to know the set of possibilities. Implementation * is deferred. */ /* ARGSUSED */ static void -mcinvert(struct parse *p, cset *cs) +mcinvert( + struct parse *p, + cset *cs) { + + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + assert(cs->multis == NULL); /* xxx */ } /* - mccase - add case counterparts of the list of collating elements in a cset + == static void mccase(struct parse *p, cset *cs); * * This would have to know the set of possibilities. Implementation * is deferred. */ /* ARGSUSED */ static void -mccase(struct parse *p, cset *cs) +mccase( + struct parse *p, + cset *cs) { + + _DIAGASSERT(p != NULL); + _DIAGASSERT(cs != NULL); + assert(cs->multis == NULL); /* xxx */ } /* - isinsets - is this character in any sets? + == static int isinsets(struct re_guts *g, int c); */ static int /* predicate */ -isinsets(struct re_guts *g, int c) +isinsets( + struct re_guts *g, + int c) { uch *col; - int i; - int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - unsigned uc = (uch)c; + size_t i; + size_t ncols; + unsigned uc = (unsigned char)c; + + _DIAGASSERT(g != NULL); + + if (g->setbits == NULL) + return 0; + + ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) if (col[uc] != 0) @@ -1227,15 +1577,23 @@ isinsets(struct re_guts *g, int c) /* - samesets - are these two characters in exactly the same sets? + == static int samesets(struct re_guts *g, int c1, int c2); */ static int /* predicate */ -samesets(struct re_guts *g, int c1, int c2) +samesets( + struct re_guts *g, + int c1, + int c2) { uch *col; - int i; - int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - unsigned uc1 = (uch)c1; - unsigned uc2 = (uch)c2; + size_t i; + size_t ncols; + unsigned uc1 = (unsigned char)c1; + unsigned uc2 = (unsigned char)c2; + + _DIAGASSERT(g != NULL); + + ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) if (col[uc1] != col[uc2]) @@ -1245,21 +1603,31 @@ samesets(struct re_guts *g, int c1, int c2) /* - categorize - sort out character categories + == static void categorize(struct parse *p, struct re_guts *g); */ static void -categorize(struct parse *p, struct re_guts *g) +categorize( + struct parse *p, + struct re_guts *g) { - cat_t *cats = g->categories; + cat_t *cats; int c; int c2; cat_t cat; + _DIAGASSERT(p != NULL); + _DIAGASSERT(g != NULL); + + cats = g->categories; + /* avoid making error situations worse */ if (p->error != 0) return; for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (cats[c] == 0 && isinsets(g, c)) { + _DIAGASSERT(__type_fit(unsigned char, + g->ncategories + 1)); cat = g->ncategories++; cats[c] = cat; for (c2 = c+1; c2 <= CHAR_MAX; c2++) @@ -1270,36 +1638,48 @@ categorize(struct parse *p, struct re_guts *g) /* - dupl - emit a duplicate of a bunch of sops + == static sopno dupl(struct parse *p, sopno start, sopno finish); */ static sopno /* start of duplicate */ -dupl(struct parse *p, - sopno start, /* from here */ - sopno finish) /* to this less one */ +dupl( + struct parse *p, + sopno start, /* from here */ + sopno finish) /* to this less one */ { - sopno ret = HERE(); + sopno ret; sopno len = finish - start; + _DIAGASSERT(p != NULL); + + ret = HERE(); + assert(finish >= start); if (len == 0) return(ret); - enlarge(p, p->ssize + len); /* this many unexpected additions */ - assert(p->ssize >= p->slen + len); - (void) memcpy((char *)(p->strip + p->slen), - (char *)(p->strip + start), (size_t)len*sizeof(sop)); + if (!enlarge(p, p->ssize + len))/* this many unexpected additions */ + return ret; + (void)memcpy(p->strip + p->slen, p->strip + start, + (size_t)len * sizeof(sop)); p->slen += len; return(ret); } /* - doemit - emit a strip operator + == static void doemit(struct parse *p, sop op, size_t opnd); * * It might seem better to implement this as a macro with a function as * hard-case backup, but it's just too big and messy unless there are * some changes to the data structures. Maybe later. */ static void -doemit(struct parse *p, sop op, size_t opnd) +doemit( + struct parse *p, + sop op, + sopno opnd) { + _DIAGASSERT(p != NULL); + /* avoid making error situations worse */ if (p->error != 0) return; @@ -1309,23 +1689,30 @@ doemit(struct parse *p, sop op, size_t opnd) /* deal with undersized strip */ if (p->slen >= p->ssize) - enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ - assert(p->slen < p->ssize); + if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ + return; /* finally, it's all reduced to the easy case */ - p->strip[p->slen++] = SOP(op, opnd); + p->strip[p->slen++] = (sop)SOP(op, opnd); } /* - doinsert - insert a sop into the strip + == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); */ static void -doinsert(struct parse *p, sop op, size_t opnd, sopno pos) +doinsert( + struct parse *p, + sop op, + sopno opnd, + sopno pos) { sopno sn; sop s; int i; + _DIAGASSERT(p != NULL); + /* avoid making error situations worse */ if (p->error != 0) return; @@ -1346,53 +1733,78 @@ doinsert(struct parse *p, sop op, size_t opnd, sopno pos) } } - memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], - (HERE()-pos-1)*sizeof(sop)); + memmove(&p->strip[pos+1], &p->strip[pos], (HERE()-pos-1)*sizeof(sop)); p->strip[pos] = s; } /* - dofwd - complete a forward reference + == static void dofwd(struct parse *p, sopno pos, sop value); */ static void -dofwd(struct parse *p, sopno pos, sop value) +dofwd( + struct parse *p, + sopno pos, + sopno value) { + + _DIAGASSERT(p != NULL); + /* avoid making error situations worse */ if (p->error != 0) return; assert(value < 1<<OPSHIFT); - p->strip[pos] = OP(p->strip[pos]) | value; + p->strip[pos] = (sop)(OP(p->strip[pos]) | value); } /* - enlarge - enlarge the strip + == static void enlarge(struct parse *p, sopno size); */ -static void -enlarge(struct parse *p, sopno size) +static int +enlarge( + struct parse *p, + sopno size) { sop *sp; + sopno osize; + + _DIAGASSERT(p != NULL); if (p->ssize >= size) - return; + return 1; - sp = (sop *)realloc(p->strip, size*sizeof(sop)); + osize = p->ssize; + p->ssize = size; + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; + sp = realloc(p->strip, p->ssize * sizeof(sop)); if (sp == NULL) { +oomem: + p->ssize = osize; SETERROR(REG_ESPACE); - return; + return 0; } p->strip = sp; - p->ssize = size; + return 1; } /* - stripsnug - compact the strip + == static void stripsnug(struct parse *p, struct re_guts *g); */ static void -stripsnug(struct parse *p, struct re_guts *g) +stripsnug( + struct parse *p, + struct re_guts *g) { + + _DIAGASSERT(p != NULL); + _DIAGASSERT(g != NULL); + g->nstates = p->slen; - g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); + g->strip = realloc(p->strip, p->slen * sizeof(sop)); if (g->strip == NULL) { SETERROR(REG_ESPACE); g->strip = p->strip; @@ -1401,6 +1813,7 @@ stripsnug(struct parse *p, struct re_guts *g) /* - findmust - fill in must and mlen with longest mandatory literal string + == static void findmust(struct parse *p, struct re_guts *g); * * This algorithm could do fancy things like analyzing the operands of | * for common subsequences. Someday. This code is simple and finds most @@ -1409,16 +1822,21 @@ stripsnug(struct parse *p, struct re_guts *g) * Note that must and mlen got initialized during setup. */ static void -findmust(struct parse *p, struct re_guts *g) +findmust( + struct parse *p, + struct re_guts *g) { sop *scan; - sop *start = NULL; /* start initialized in the default case, after that */ - sop *newstart; /* newstart was initialized in the OCHAR case */ + sop *start = NULL; + sop *newstart = NULL; sopno newlen; sop s; char *cp; sopno i; + _DIAGASSERT(p != NULL); + _DIAGASSERT(g != NULL); + /* avoid making error situations worse */ if (p->error != 0) return; @@ -1451,7 +1869,7 @@ findmust(struct parse *p, struct re_guts *g) return; } } while (OP(s) != O_QUEST && OP(s) != O_CH); - /* fallthrough */ + /* FALLTHROUGH */ default: /* things that break a sequence */ if (newlen > g->mlen) { /* ends one */ start = newstart; @@ -1462,7 +1880,10 @@ findmust(struct parse *p, struct re_guts *g) } } while (OP(s) != OEND); - if (g->mlen == 0) /* there isn't one */ + if (start == NULL) + g->mlen = 0; + + if (g->mlen == 0) /* there isn't one */ return; /* turn it into a character string */ @@ -1485,15 +1906,21 @@ findmust(struct parse *p, struct re_guts *g) /* - pluscount - count + nesting + == static sopno pluscount(struct parse *p, struct re_guts *g); */ static sopno /* nesting depth */ -pluscount(struct parse *p, struct re_guts *g) +pluscount( + struct parse *p, + struct re_guts *g) { sop *scan; sop s; sopno plusnest = 0; sopno maxnest = 0; + _DIAGASSERT(p != NULL); + _DIAGASSERT(g != NULL); + if (p->error != 0) return(0); /* there may not be an OEND */ diff --git a/libc/upstream-netbsd/libc/regex/regerror.c b/libc/upstream-netbsd/libc/regex/regerror.c new file mode 100644 index 0000000..e00d7c0 --- /dev/null +++ b/libc/upstream-netbsd/libc/regex/regerror.c @@ -0,0 +1,223 @@ +/* $NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $ */ + +/*- + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regerror.c 8.4 (Berkeley) 3/20/94 + */ + +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regerror.c 8.4 (Berkeley) 3/20/94 + */ + +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94"; +#else +__RCSID("$NetBSD: regerror.c,v 1.23 2007/02/09 23:44:18 junyoung Exp $"); +#endif +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <regex.h> + +#ifdef __weak_alias +__weak_alias(regerror,_regerror) +#endif + +#include "utils.h" + +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === regerror.c === */ +static const char *regatoi(const regex_t *preg, char *localbuf, size_t buflen); + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ +/* + = #define REG_NOMATCH 1 + = #define REG_BADPAT 2 + = #define REG_ECOLLATE 3 + = #define REG_ECTYPE 4 + = #define REG_EESCAPE 5 + = #define REG_ESUBREG 6 + = #define REG_EBRACK 7 + = #define REG_EPAREN 8 + = #define REG_EBRACE 9 + = #define REG_BADBR 10 + = #define REG_ERANGE 11 + = #define REG_ESPACE 12 + = #define REG_BADRPT 13 + = #define REG_EMPTY 14 + = #define REG_ASSERT 15 + = #define REG_INVARG 16 + = #define REG_ATOI 255 // convert name to number (!) + = #define REG_ITOA 0400 // convert number to name (!) + */ +static const struct rerr { + int code; + const char *name; + const char *explain; +} rerrs[] = { + { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, + { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, + { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, + { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, + { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, + { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, + { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, + { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, + { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, + { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, + { REG_ERANGE, "REG_ERANGE", "invalid character range" }, + { REG_ESPACE, "REG_ESPACE", "out of memory" }, + { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, + { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, + { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, + { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, + { 0, "", "*** unknown regexp error code ***" } +}; + +/* + * regerror - the interface to error numbers + * extern size_t regerror(int, const regex_t *, char *, size_t); + */ +/* ARGSUSED */ +size_t +regerror( + int errcode, + const regex_t *preg, + char *errbuf, + size_t errbuf_size) +{ + const struct rerr *r; + size_t len; + int target = errcode &~ REG_ITOA; + const char *s; + char convbuf[50]; + + _DIAGASSERT(errcode != REG_ATOI || preg != NULL); + _DIAGASSERT(errbuf != NULL); + + if (errcode == REG_ATOI) + s = regatoi(preg, convbuf, sizeof convbuf); + else { + for (r = rerrs; r->code != 0; r++) + if (r->code == target) + break; + + if (errcode & REG_ITOA) { + if (r->code != 0) { + (void)strlcpy(convbuf, r->name, sizeof convbuf); + } else + (void)snprintf(convbuf, sizeof convbuf, + "REG_0x%x", target); + s = convbuf; + } else + s = r->explain; + } + + len = strlen(s) + 1; + if (errbuf_size > 0) + (void)strlcpy(errbuf, s, errbuf_size); + + return(len); +} + +/* + * regatoi - internal routine to implement REG_ATOI + * static const char *regatoi(const regex_t *preg, char *localbuf, + * size_t buflen); + */ +static const char * +regatoi( + const regex_t *preg, + char *localbuf, + size_t buflen) +{ + const struct rerr *r; + + for (r = rerrs; r->code != 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code == 0) + return "0"; + + (void)snprintf(localbuf, buflen, "%d", r->code); + return localbuf; +} diff --git a/libc/regex/regex2.h b/libc/upstream-netbsd/libc/regex/regex2.h index 15e15bc..7c877ee 100644 --- a/libc/regex/regex2.h +++ b/libc/upstream-netbsd/libc/regex/regex2.h @@ -1,7 +1,6 @@ -/* $OpenBSD: regex2.h,v 1.7 2004/11/30 17:04:23 otto Exp $ */ +/* $NetBSD: regex2.h,v 1.13 2011/10/09 18:23:00 christos Exp $ */ /*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * @@ -35,6 +34,57 @@ * @(#)regex2.h 8.4 (Berkeley) 3/20/94 */ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex2.h 8.4 (Berkeley) 3/20/94 + */ + +/* + * First, the stuff that ends up in the outside-world include file + = typedef off_t regoff_t; + = typedef struct { + = int re_magic; + = size_t re_nsub; // number of parenthesized subexpressions + = const char *re_endp; // end pointer for REG_PEND + = struct re_guts *re_g; // none of your business :-) + = } regex_t; + = typedef struct { + = regoff_t rm_so; // start of match + = regoff_t rm_eo; // end of match + = } regmatch_t; + */ /* * internals of regex_t */ @@ -59,36 +109,38 @@ * In state representations, an operator's bit is on to signify a state * immediately *preceding* "execution" of that operator. */ -typedef unsigned long sop; /* strip operator */ -typedef long sopno; -#define OPRMASK 0xf8000000LU -#define OPDMASK 0x07ffffffLU +typedef u_int32_t sop; /* strip operator */ +typedef size_t sopno; +#define OPRMASK ((u_int32_t)0xf8000000UL) +#define OPDMASK ((u_int32_t)0x07ffffffUL) #define OPSHIFT ((unsigned)27) #define OP(n) ((n)&OPRMASK) -#define OPND(n) ((n)&OPDMASK) +#define OPND(n) ((int)((n)&OPDMASK)) #define SOP(op, opnd) ((op)|(opnd)) -/* operators meaning operand */ -/* (back, fwd are offsets) */ -#define OEND (1LU<<OPSHIFT) /* endmarker - */ -#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */ -#define OBOL (3LU<<OPSHIFT) /* left anchor - */ -#define OEOL (4LU<<OPSHIFT) /* right anchor - */ -#define OANY (5LU<<OPSHIFT) /* . - */ -#define OANYOF (6LU<<OPSHIFT) /* [...] set number */ -#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */ -#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */ -#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */ -#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */ -#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */ -#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */ -#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */ -#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */ -#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */ -#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ -#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ -#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */ -#define OBOW (19LU<<OPSHIFT) /* begin word - */ -#define OEOW (20LU<<OPSHIFT) /* end word - */ + +#define OPC(n) (((u_int32_t)(n))<<OPSHIFT) +/* operators meaning operand */ +/* (back, fwd are offsets) */ +#define OEND OPC(1) /* endmarker - */ +#define OCHAR OPC(2) /* character unsigned char */ +#define OBOL OPC(3) /* left anchor - */ +#define OEOL OPC(4) /* right anchor - */ +#define OANY OPC(5) /* . - */ +#define OANYOF OPC(6) /* [...] set number */ +#define OBACK_ OPC(7) /* begin \d paren number */ +#define O_BACK OPC(8) /* end \d paren number */ +#define OPLUS_ OPC(9) /* + prefix fwd to suffix */ +#define O_PLUS OPC(10) /* + suffix back to prefix */ +#define OQUEST_ OPC(11) /* ? prefix fwd to suffix */ +#define O_QUEST OPC(12) /* ? suffix back to prefix */ +#define OLPAREN OPC(13) /* ( fwd to ) */ +#define ORPAREN OPC(14) /* ) back to ( */ +#define OCH_ OPC(15) /* begin choice fwd to OOR2 */ +#define OOR1 OPC(16) /* | pt. 1 back to OOR1 or OCH_ */ +#define OOR2 OPC(17) /* | pt. 2 fwd to OOR2 or O_CH */ +#define O_CH OPC(18) /* end choice back to OOR1 */ +#define OBOW OPC(19) /* begin word - */ +#define OEOW OPC(20) /* end word - */ /* * Structure for [] character-set representation. Character sets are @@ -127,8 +179,8 @@ struct re_guts { int magic; # define MAGIC2 ((('R'^0200)<<8)|'E') sop *strip; /* malloced area for strip */ - int csetsize; /* number of bits in a cset vector */ - int ncsets; /* number of csets in use */ + size_t csetsize; /* number of bits in a cset vector */ + size_t ncsets; /* number of csets in use */ cset *sets; /* -> cset [ncsets] */ uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ int cflags; /* copy of regcomp() cflags argument */ @@ -139,12 +191,12 @@ struct re_guts { # define USEBOL 01 /* used ^ */ # define USEEOL 02 /* used $ */ # define BAD 04 /* something wrong */ - int nbol; /* number of ^ used */ - int neol; /* number of $ used */ - int ncategories; /* how many character categories */ + size_t nbol; /* number of ^ used */ + size_t neol; /* number of $ used */ + size_t ncategories; /* how many character categories */ cat_t *categories; /* ->catspace[-CHAR_MIN] */ char *must; /* match must contain this string */ - int mlen; /* length of must */ + size_t mlen; /* length of must */ size_t nsub; /* copy of re_nsub */ int backrefs; /* does it use back references? */ sopno nplus; /* how deep does it nest +s? */ @@ -154,4 +206,4 @@ struct re_guts { /* misc utilities */ #define OUT (CHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') +#define ISWORD(c) (isalnum((unsigned char)c) || (c) == '_') diff --git a/libc/regex/regexec.c b/libc/upstream-netbsd/libc/regex/regexec.c index 6feed3b..f16e0b6 100644 --- a/libc/regex/regexec.c +++ b/libc/upstream-netbsd/libc/regex/regexec.c @@ -1,6 +1,6 @@ -/* $OpenBSD: regexec.c,v 1.11 2005/08/05 13:03:00 espie Exp $ */ +/* $NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $ */ + /*- - * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 * The Regents of the University of California. All rights reserved. * @@ -34,6 +34,52 @@ * @(#)regexec.c 8.3 (Berkeley) 3/20/94 */ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regexec.c 8.3 (Berkeley) 3/20/94 + */ + +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94"; +#else +__RCSID("$NetBSD: regexec.c,v 1.22 2012/03/13 21:13:43 christos Exp $"); +#endif +#endif /* LIBC_SCCS and not lint */ + /* * the outer shell of regexec() * @@ -41,39 +87,46 @@ * macros that code uses. This lets the same code operate on two different * representations for state sets. */ +#include "namespace.h" #include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <limits.h> -#include <ctype.h> #include <regex.h> +#ifdef __weak_alias +__weak_alias(regexec,_regexec) +#endif + #include "utils.h" #include "regex2.h" /* macros for manipulating states, small version */ -#define states long -#define states1 states /* for later use in regexec() decision */ +#define states unsigned long +#define states1 unsigned long /* for later use in regexec() decision */ #define CLEAR(v) ((v) = 0) #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) #define SET1(v, n) ((v) |= (unsigned long)1 << (n)) #define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) #define ASSIGN(d, s) ((d) = (s)) #define EQ(a, b) ((a) == (b)) -#define STATEVARS long dummy /* dummy version */ +#define STATEVARS int dummy /* dummy version */ #define STATESETUP(m, n) /* nothing */ #define STATETEARDOWN(m) /* nothing */ #define SETUP(v) ((v) = 0) -#define onestate long +#define onestate unsigned long #define INIT(o, n) ((o) = (unsigned long)1 << (n)) -#define INC(o) ((o) <<= 1) +#define INC(o) ((o) <<= 1) #define ISSTATEIN(v, o) (((v) & (o)) != 0) /* some abbreviations; note that some of these know variable names! */ /* do "if I'm here, I can also be there" etc without branches */ #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) -#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) +#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) /* function names */ #define SNAMES /* engine.c looks after details */ @@ -102,20 +155,23 @@ /* macros for manipulating states, large version */ #define states char * -#define CLEAR(v) memset(v, 0, m->g->nstates) +#define CLEAR(v) memset(v, 0, (size_t)m->g->nstates) #define SET0(v, n) ((v)[n] = 0) #define SET1(v, n) ((v)[n] = 1) #define ISSET(v, n) ((v)[n]) -#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) -#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) -#define STATEVARS long vn; char *space -#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ - if ((m)->space == NULL) return(REG_ESPACE); \ - (m)->vn = 0; } -#define STATETEARDOWN(m) { free((m)->space); } -#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) -#define onestate long -#define INIT(o, n) ((o) = (n)) +#define ASSIGN(d, s) memcpy(d, s, (size_t)m->g->nstates) +#define EQ(a, b) (memcmp(a, b, (size_t)m->g->nstates) == 0) +#define STATEVARS int vn; char *space +#define STATESETUP(m, nv) \ + if (((m)->space = malloc((size_t)((nv)*(m)->g->nstates))) == NULL) \ + return(REG_ESPACE); \ + else \ + (m)->vn = 0 + +#define STATETEARDOWN(m) { free((m)->space); m->space = NULL; } +#define SETUP(v) ((v) = &m->space[(size_t)(m->vn++ * m->g->nstates)]) +#define onestate int +#define INIT(o, n) ((o) = (int)(n)) #define INC(o) ((o)++) #define ISSTATEIN(v, o) ((v)[o]) /* some abbreviations; note that some of these know variable names! */ @@ -130,22 +186,38 @@ /* - regexec - interface for matching + = extern int regexec(const regex_t *, const char *, size_t, \ + = regmatch_t [], int); + = #define REG_NOTBOL 00001 + = #define REG_NOTEOL 00002 + = #define REG_STARTEND 00004 + = #define REG_TRACE 00400 // tracing of execution + = #define REG_LARGE 01000 // force large representation + = #define REG_BACKR 02000 // force use of backref code * * We put this here so we can exploit knowledge of the state representation * when choosing which matcher to call. Also, by this point the matchers * have been prototyped. */ int /* 0 success, REG_NOMATCH failure */ -regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags) +regexec( + const regex_t *preg, + const char *string, + size_t nmatch, + regmatch_t pmatch[], + int eflags) { struct re_guts *g = preg->re_g; + char *s; #ifdef REDEBUG # define GOODFLAGS(f) (f) #else # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) #endif + _DIAGASSERT(preg != NULL); + _DIAGASSERT(string != NULL); + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) return(REG_BADPAT); assert(!(g->iflags&BAD)); @@ -153,8 +225,10 @@ regexec(const regex_t *preg, const char *string, size_t nmatch, return(REG_BADPAT); eflags = GOODFLAGS(eflags); - if (g->nstates <= (int)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE)) - return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); + s = __UNCONST(string); + + if (g->nstates <= (sopno)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE)) + return(smatcher(g, s, nmatch, pmatch, eflags)); else - return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); + return(lmatcher(g, s, nmatch, pmatch, eflags)); } diff --git a/libc/upstream-netbsd/libc/regex/regfree.c b/libc/upstream-netbsd/libc/regex/regfree.c new file mode 100644 index 0000000..ce011ea --- /dev/null +++ b/libc/upstream-netbsd/libc/regex/regfree.c @@ -0,0 +1,129 @@ +/* $NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $ */ + +/*- + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regfree.c 8.3 (Berkeley) 3/20/94 + */ + +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regfree.c 8.3 (Berkeley) 3/20/94 + */ + +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94"; +#else +__RCSID("$NetBSD: regfree.c,v 1.15 2007/02/09 23:44:18 junyoung Exp $"); +#endif +#endif /* LIBC_SCCS and not lint */ + +#include "namespace.h" +#include <sys/types.h> + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <regex.h> + +#ifdef __weak_alias +__weak_alias(regfree,_regfree) +#endif + +#include "utils.h" +#include "regex2.h" + +/* + - regfree - free everything + = extern void regfree(regex_t *); + */ +void +regfree( + regex_t *preg) +{ + struct re_guts *g; + + _DIAGASSERT(preg != NULL); + + _DIAGASSERT(preg->re_magic == MAGIC1); + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ + return; + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free(g->strip); + if (g->sets != NULL) + free(g->sets); + if (g->setbits != NULL) + free(g->setbits); + if (g->must != NULL) + free(g->must); + free(g); +} diff --git a/libc/upstream-netbsd/libc/regex/utils.h b/libc/upstream-netbsd/libc/regex/utils.h new file mode 100644 index 0000000..762caee --- /dev/null +++ b/libc/upstream-netbsd/libc/regex/utils.h @@ -0,0 +1,91 @@ +/* $NetBSD: utils.h,v 1.6 2003/08/07 16:43:21 agc Exp $ */ + +/*- + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)utils.h 8.3 (Berkeley) 3/20/94 + */ + +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)utils.h 8.3 (Berkeley) 3/20/94 + */ + +/* utility definitions */ +#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ +#define INFINITY (DUPMAX + 1) +#define NC (CHAR_MAX - CHAR_MIN + 1) +typedef unsigned char uch; + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif +#include <assert.h> + +/* for old systems with bcopy() but no memmove() */ +#ifdef USEBCOPY +#define memmove(d, s, c) bcopy(s, d, c) +#endif diff --git a/libc/upstream-netbsd/netbsd-compat.h b/libc/upstream-netbsd/netbsd-compat.h index a52052a..3833c1d 100644 --- a/libc/upstream-netbsd/netbsd-compat.h +++ b/libc/upstream-netbsd/netbsd-compat.h @@ -21,4 +21,7 @@ #include <assert.h> #define _DIAGASSERT(e) ((e) ? (void) 0 : __assert2(__FILE__, __LINE__, __func__, #e)) +// TODO: update our <sys/cdefs.h> to support this properly. +#define __type_fit(t, a) (0 == 0) + #endif diff --git a/tests/Android.mk b/tests/Android.mk new file mode 100644 index 0000000..2721138 --- /dev/null +++ b/tests/Android.mk @@ -0,0 +1,43 @@ +# +# Copyright (C) 2012 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright The Android Open Source Project + +LOCAL_PATH := $(call my-dir) + +test_module = bionic-unit-tests +test_tags = eng tests + +test_src_files = \ + regex_test.cpp \ + +# Build for the device (with bionic). Run with: +# adb shell /data/nativetest/bionic-unit-tests/bionic-unit-tests +include $(CLEAR_VARS) +LOCAL_MODULE := $(test_module) +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk +LOCAL_MODULE_TAGS := $(test_tags) +LOCAL_SRC_FILES := $(test_src_files) +include $(BUILD_NATIVE_TEST) + +# Build for the host (with glibc). +# Note that this will build against glibc, so it's not useful for testing +# bionic's implementation, but it does let you use glibc as a reference +# implementation for testing the tests themselves. +include $(CLEAR_VARS) +LOCAL_MODULE := $(test_module)-glibc +LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk +LOCAL_MODULE_TAGS := $(test_tags) +LOCAL_SRC_FILES := $(test_src_files) +include $(BUILD_HOST_NATIVE_TEST) diff --git a/tests/regex_test.cpp b/tests/regex_test.cpp new file mode 100644 index 0000000..659d1db --- /dev/null +++ b/tests/regex_test.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include <sys/types.h> +#include <regex.h> + +TEST(regex, smoke) { + // A quick test of all the regex functions. + regex_t re; + ASSERT_EQ(0, regcomp(&re, "ab*c", 0)); + ASSERT_EQ(0, regexec(&re, "abbbc", 0, NULL, 0)); + ASSERT_EQ(REG_NOMATCH, regexec(&re, "foo", 0, NULL, 0)); + + char buf[80]; + regerror(REG_NOMATCH, &re, buf, sizeof(buf)); +#if __BIONIC__ + ASSERT_STREQ("regexec() failed to match", buf); +#else + ASSERT_STREQ("No match", buf); +#endif + + regfree(&re); +} |