1 files changed, 479 insertions, 271 deletions
diff --git a/gnulib-local/lib/libxml/xmlregexp.c b/gnulib-local/lib/libxml/xmlregexp.c
index e7d519e..3e912ab 100644
--- a/gnulib-local/lib/libxml/xmlregexp.c
+++ b/gnulib-local/lib/libxml/xmlregexp.c
@@ -1,7 +1,7 @@
 /*
  * regexp.c: generic and extensible Regular Expression engine
  *
- * Basically designed with the purpose of compiling regexps for 
+ * Basically designed with the purpose of compiling regexps for
  * the variety of validation/shemas mechanisms now available in
  * XML related specifications these include:
  *    - XML-1.0 DTD validation
@@ -44,6 +44,9 @@
 
 #define MAX_PUSH 10000000
 
+#ifdef ERROR
+#undef ERROR
+#endif
 #define ERROR(str)							\
     ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
     xmlRegexpErrCompile(ctxt, str);
@@ -54,21 +57,26 @@
 #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
 #define NEXTL(l) ctxt->cur += l;
 #define XML_REG_STRING_SEPARATOR '|'
+/*
+ * Need PREV to check on a '-' within a Character Group. May only be used
+ * when it's guaranteed that cur is not at the beginning of ctxt->string!
+ */
+#define PREV (ctxt->cur[-1])
 
 /**
  * TODO:
  *
  * macro to flag unimplemented blocks
  */
-#define TODO 								\
+#define TODO								\
     xmlGenericError(xmlGenericErrorContext,				\
 	    "Unimplemented block at %s:%d\n",				\
             __FILE__, __LINE__);
 
 /************************************************************************
- * 									*
- * 			Datatypes and structures			*
- * 									*
+ *									*
+ *			Datatypes and structures			*
+ *									*
  ************************************************************************/
 
 /*
@@ -145,7 +153,8 @@ typedef enum {
     XML_REGEXP_START_STATE = 1,
     XML_REGEXP_FINAL_STATE,
     XML_REGEXP_TRANS_STATE,
-    XML_REGEXP_SINK_STATE
+    XML_REGEXP_SINK_STATE,
+    XML_REGEXP_UNREACH_STATE
 } xmlRegStateType;
 
 typedef enum {
@@ -183,6 +192,7 @@ struct _xmlRegAtom {
     int neg;
     int codepoint;
     xmlRegStatePtr start;
+    xmlRegStatePtr start0;
     xmlRegStatePtr stop;
     int maxRanges;
     int nbRanges;
@@ -212,6 +222,7 @@ struct _xmlRegTrans {
 struct _xmlAutomataState {
     xmlRegStateType type;
     xmlRegMarkedType mark;
+    xmlRegMarkedType markd;
     xmlRegMarkedType reached;
     int no;
     int maxTrans;
@@ -226,6 +237,8 @@ struct _xmlAutomataState {
 typedef struct _xmlAutomata xmlRegParserCtxt;
 typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
 
+#define AM_AUTOMATA_RNG 1
+
 struct _xmlAutomata {
     xmlChar *string;
     xmlChar *cur;
@@ -253,6 +266,7 @@ struct _xmlAutomata {
 
     int determinist;
     int negs;
+    int flags;
 };
 
 struct _xmlRegexp {
@@ -264,6 +278,7 @@ struct _xmlRegexp {
     int nbCounters;
     xmlRegCounter *counters;
     int determinist;
+    int flags;
     /*
      * That's the compact form for determinists automatas
      */
@@ -346,9 +361,11 @@ static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
                   int neg, int start, int end, const xmlChar *blockName);
 
+void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
+
 /************************************************************************
  *									*
- * 		Regexp memory error handler				*
+ *		Regexp memory error handler				*
  *									*
  ************************************************************************/
 /**
@@ -395,9 +412,9 @@ xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
 }
 
 /************************************************************************
- * 									*
- * 			Allocation/Deallocation				*
- * 									*
+ *									*
+ *			Allocation/Deallocation				*
+ *									*
  ************************************************************************/
 
 static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
@@ -427,6 +444,7 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
     ret->nbCounters = ctxt->nbCounters;
     ret->counters = ctxt->counters;
     ret->determinist = ctxt->determinist;
+    ret->flags = ctxt->flags;
     if (ret->determinist == -1) {
         xmlRegexpIsDeterminist(ret);
     }
@@ -727,11 +745,41 @@ xmlRegFreeRange(xmlRegRangePtr range) {
 }
 
 /**
+ * xmlRegCopyRange:
+ * @range:  the regexp range
+ *
+ * Copy a regexp range
+ *
+ * Returns the new copy or NULL in case of error.
+ */
+static xmlRegRangePtr
+xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
+    xmlRegRangePtr ret;
+
+    if (range == NULL)
+	return(NULL);
+
+    ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
+                         range->end);
+    if (ret == NULL)
+        return(NULL);
+    if (range->blockName != NULL) {
+	ret->blockName = xmlStrdup(range->blockName);
+	if (ret->blockName == NULL) {
+	    xmlRegexpErrMemory(ctxt, "allocating range");
+	    xmlRegFreeRange(ret);
+	    return(NULL);
+	}
+    }
+    return(ret);
+}
+
+/**
  * xmlRegNewAtom:
  * @ctxt:  the regexp parser context
  * @type:  the type of atom
  *
- * Allocate a new regexp range
+ * Allocate a new atom
  *
  * Returns the new atom or NULL in case of error
  */
@@ -778,6 +826,52 @@ xmlRegFreeAtom(xmlRegAtomPtr atom) {
     xmlFree(atom);
 }
 
+/**
+ * xmlRegCopyAtom:
+ * @ctxt:  the regexp parser context
+ * @atom:  the oiginal atom
+ *
+ * Allocate a new regexp range
+ *
+ * Returns the new atom or NULL in case of error
+ */
+static xmlRegAtomPtr
+xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
+    xmlRegAtomPtr ret;
+
+    ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
+    if (ret == NULL) {
+	xmlRegexpErrMemory(ctxt, "copying atom");
+	return(NULL);
+    }
+    memset(ret, 0, sizeof(xmlRegAtom));
+    ret->type = atom->type;
+    ret->quant = atom->quant;
+    ret->min = atom->min;
+    ret->max = atom->max;
+    if (atom->nbRanges > 0) {
+        int i;
+
+        ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
+	                                           atom->nbRanges);
+	if (ret->ranges == NULL) {
+	    xmlRegexpErrMemory(ctxt, "copying atom");
+	    goto error;
+	}
+	for (i = 0;i < atom->nbRanges;i++) {
+	    ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
+	    if (ret->ranges[i] == NULL)
+	        goto error;
+	    ret->nbRanges = i + 1;
+	}
+    }
+    return(ret);
+
+error:
+    xmlRegFreeAtom(ret);
+    return(NULL);
+}
+
 static xmlRegStatePtr
 xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
     xmlRegStatePtr ret;
@@ -841,9 +935,9 @@ xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
 }
 
 /************************************************************************
- * 									*
- * 			Display of Data structures			*
- * 									*
+ *									*
+ *			Display of Data structures			*
+ *									*
  ************************************************************************/
 
 static void
@@ -1050,7 +1144,7 @@ xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
 	fprintf(output, "char %c ", trans->atom->codepoint);
     fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
 }
-    
+
 static void
 xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
     int i;
@@ -1064,7 +1158,7 @@ xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
 	fprintf(output, "START ");
     if (state->type == XML_REGEXP_FINAL_STATE)
 	fprintf(output, "FINAL ");
-    
+
     fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
     for (i = 0;i < state->nbTrans; i++) {
 	xmlRegPrintTrans(output, &(state->trans[i]));
@@ -1114,12 +1208,12 @@ xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
 #endif
 
 /************************************************************************
- * 									*
+ *									*
  *		 Finite Automata structures manipulations		*
- * 									*
+ *									*
  ************************************************************************/
 
-static void 
+static void
 xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
 	           int neg, xmlRegAtomType type, int start, int end,
 		   xmlChar *blockName) {
@@ -1159,7 +1253,7 @@ xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
 	return;
     range->blockName = blockName;
     atom->ranges[atom->nbRanges++] = range;
-    
+
 }
 
 static int
@@ -1190,7 +1284,7 @@ xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
     return(ctxt->nbCounters++);
 }
 
-static int 
+static int
 xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
     if (atom == NULL) {
 	ERROR("atom push: atom is NULL");
@@ -1222,7 +1316,7 @@ xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
     return(0);
 }
 
-static void 
+static void
 xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
                       int from) {
     if (target->maxTransTo == 0) {
@@ -1250,7 +1344,7 @@ xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
     target->nbTransTo++;
 }
 
-static void 
+static void
 xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 	            xmlRegAtomPtr atom, xmlRegStatePtr target,
 		    int counter, int count) {
@@ -1316,7 +1410,7 @@ xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 	printf("counted %d\n", counter);
     else if (atom == NULL)
 	printf("epsilon transition\n");
-    else if (atom != NULL) 
+    else if (atom != NULL)
         xmlRegPrintAtom(stdout, atom);
 #endif
 
@@ -1449,6 +1543,8 @@ xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
 static int
 xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
+    xmlRegStatePtr end;
+
     if (atom == NULL) {
 	ERROR("genrate transition: atom == NULL");
 	return(-1);
@@ -1468,7 +1564,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 	     */
 	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
 #ifdef DV
-	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 
+	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
 		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
 	    to = xmlRegNewState(ctxt);
 	    xmlRegStatePush(ctxt, to);
@@ -1482,10 +1578,15 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 		/*
 		 * transition done to the state after end of atom.
 		 *      1. set transition from atom start to new state
-		 *      2. set transition from atom end to this state. 
+		 *      2. set transition from atom end to this state.
 		 */
-		xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
-		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, ctxt->state);
+                if (to == NULL) {
+                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
+                    xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
+                                                   ctxt->state);
+                } else {
+                    xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
+                }
 		break;
 	    case XML_REGEXP_QUANT_MULT:
 		atom->quant = XML_REGEXP_QUANT_ONCE;
@@ -1498,58 +1599,89 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 		break;
 	    case XML_REGEXP_QUANT_RANGE: {
 		int counter;
-		xmlRegStatePtr newstate;
+		xmlRegStatePtr inter, newstate;
 
 		/*
-		 * This one is nasty:
-		 *   1/ if range has minOccurs == 0, create a new state
-		 *	and create epsilon transitions from atom->start
-		 *	to atom->stop, as well as atom->start to the new
-		 *	state
-		 *   2/ register a new counter
-		 *   3/ register an epsilon transition associated to
-		 *      this counter going from atom->stop to atom->start
-		 *   4/ create a new state
-		 *   5/ generate a counted transition from atom->stop to
-		 *      that state
+		 * create the final state now if needed
 		 */
-		if (atom->min == 0) {
-		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
-			atom->stop);
+		if (to != NULL) {
+		    newstate = to;
+		} else {
 		    newstate = xmlRegNewState(ctxt);
 		    xmlRegStatePush(ctxt, newstate);
-		    ctxt->state = newstate;
+		}
+
+		/*
+		 * The principle here is to use counted transition
+		 * to avoid explosion in the number of states in the
+		 * graph. This is clearly more complex but should not
+		 * be exploitable at runtime.
+		 */
+		if ((atom->min == 0) && (atom->start0 == NULL)) {
+		    xmlRegAtomPtr copy;
+		    /*
+		     * duplicate a transition based on atom to count next
+		     * occurences after 1. We cannot loop to atom->start
+		     * directly because we need an epsilon transition to
+		     * newstate.
+		     */
+		     /* ???? For some reason it seems we never reach that
+		        case, I suppose this got optimized out before when
+			building the automata */
+		    copy = xmlRegCopyAtom(ctxt, atom);
+		    if (copy == NULL)
+		        return(-1);
+		    copy->quant = XML_REGEXP_QUANT_ONCE;
+		    copy->min = 0;
+		    copy->max = 0;
+
+		    if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
+		        < 0)
+			return(-1);
+		    inter = ctxt->state;
+		    counter = xmlRegGetCounter(ctxt);
+		    ctxt->counters[counter].min = atom->min - 1;
+		    ctxt->counters[counter].max = atom->max - 1;
+		    /* count the number of times we see it again */
+		    xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
+						   atom->stop, counter);
+		    /* allow a way out based on the count */
+		    xmlFAGenerateCountedTransition(ctxt, inter,
+			                           newstate, counter);
+		    /* and also allow a direct exit for 0 */
 		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
-			newstate);
+		                                   newstate);
+		} else {
+		    /*
+		     * either we need the atom at least once or there
+		     * is an atom->start0 allowing to easilly plug the
+		     * epsilon transition.
+		     */
+		    counter = xmlRegGetCounter(ctxt);
+		    ctxt->counters[counter].min = atom->min - 1;
+		    ctxt->counters[counter].max = atom->max - 1;
+		    /* count the number of times we see it again */
+		    xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
+						   atom->start, counter);
+		    /* allow a way out based on the count */
+		    xmlFAGenerateCountedTransition(ctxt, atom->stop,
+			                           newstate, counter);
+		    /* and if needed allow a direct exit for 0 */
+		    if (atom->min == 0)
+			xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
+						       newstate);
+
 		}
-		counter = xmlRegGetCounter(ctxt);
-		ctxt->counters[counter].min = atom->min - 1;
-		ctxt->counters[counter].max = atom->max - 1;
 		atom->min = 0;
 		atom->max = 0;
 		atom->quant = XML_REGEXP_QUANT_ONCE;
-		if (to != NULL) {
-		    newstate = to;
-		} else {
-		    newstate = xmlRegNewState(ctxt);
-		    xmlRegStatePush(ctxt, newstate);
-		}
 		ctxt->state = newstate;
-		xmlFAGenerateCountedTransition(ctxt, atom->stop,
-			                       newstate, counter);
-                
-                /*
-		 * first check count and if OK jump forward; 
-                 * if checking fail increment count and jump back
-		 */
-		xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
-			                              atom->start, counter);
 	    }
 	    default:
 		break;
 	}
 	return(0);
-    } 
+    }
     if ((atom->min == 0) && (atom->max == 0) &&
                (atom->quant == XML_REGEXP_QUANT_RANGE)) {
         /*
@@ -1576,11 +1708,30 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 	    return(-1);
 	}
     }
+    end = to;
+    if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
+        (atom->quant == XML_REGEXP_QUANT_PLUS)) {
+	/*
+	 * Do not pollute the target state by adding transitions from
+	 * it as it is likely to be the shared target of multiple branches.
+	 * So isolate with an epsilon transition.
+	 */
+        xmlRegStatePtr tmp;
+
+	tmp = xmlRegNewState(ctxt);
+	if (tmp != NULL)
+	    xmlRegStatePush(ctxt, tmp);
+	else {
+	    return(-1);
+	}
+	xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
+	to = tmp;
+    }
     if (xmlRegAtomPush(ctxt, atom) < 0) {
 	return(-1);
     }
     xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
-    ctxt->state = to;
+    ctxt->state = end;
     switch (atom->quant) {
 	case XML_REGEXP_QUANT_OPT:
 	    atom->quant = XML_REGEXP_QUANT_ONCE;
@@ -1595,6 +1746,13 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
 	    break;
+	case XML_REGEXP_QUANT_RANGE:
+#if DV_test
+	    if (atom->min == 0) {
+		xmlFAGenerateEpsilonTransition(ctxt, from, to);
+	    }
+#endif
+	    break;
 	default:
 	    break;
     }
@@ -1605,7 +1763,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
  * xmlFAReduceEpsilonTransitions:
  * @ctxt:  a regexp parser context
  * @fromnr:  the from state
- * @tonr:  the to state 
+ * @tonr:  the to state
  * @counter:  should that transition be associated to a counted
  *
  */
@@ -1649,7 +1807,7 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
 		    int newto = to->trans[transnr].to;
 
 		    xmlRegStateAddTrans(ctxt, from, NULL,
-					ctxt->states[newto], 
+					ctxt->states[newto],
 					-1, to->trans[transnr].count);
 		} else {
 #ifdef DEBUG_REGEXP_GRAPH
@@ -1671,11 +1829,11 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
 	    int newto = to->trans[transnr].to;
 
 	    if (to->trans[transnr].counter >= 0) {
-		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
-				    ctxt->states[newto], 
+		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
+				    ctxt->states[newto],
 				    to->trans[transnr].counter, -1);
 	    } else {
-		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
+		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
 				    ctxt->states[newto], counter, -1);
 	    }
 	}
@@ -1687,7 +1845,7 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
  * xmlFAEliminateSimpleEpsilonTransitions:
  * @ctxt:  a regexp parser context
  *
- * Eliminating general epsilon transitions can get costly in the general 
+ * Eliminating general epsilon transitions can get costly in the general
  * algorithm due to the large amount of generated new transitions and
  * associated comparisons. However for simple epsilon transition used just
  * to separate building blocks when generating the automata this can be
@@ -1709,6 +1867,8 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 	    continue;
 	if (state->nbTrans != 1)
 	    continue;
+	if (state->type == XML_REGEXP_UNREACH_STATE)
+	    continue;
 	/* is the only transition out a basic transition */
 	if ((state->trans[0].atom == NULL) &&
 	    (state->trans[0].to >= 0) &&
@@ -1721,48 +1881,37 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 #ifdef DEBUG_REGEXP_GRAPH
 		printf("Found simple epsilon trans from start %d to %d\n",
 		       statenr, newto);
-#endif     
+#endif
             } else {
 #ifdef DEBUG_REGEXP_GRAPH
 		printf("Found simple epsilon trans from %d to %d\n",
 		       statenr, newto);
-#endif     
+#endif
 	        for (i = 0;i < state->nbTransTo;i++) {
 		    tmp = ctxt->states[state->transTo[i]];
 		    for (j = 0;j < tmp->nbTrans;j++) {
 			if (tmp->trans[j].to == statenr) {
-			    tmp->trans[j].to = newto;
-#ifdef DEBUG_REGEXP_GRAPH
-			    printf("Changed transition %d on %d to go to %d\n",
-				   j, tmp->no, newto);
-#endif     
-                            xmlRegStateAddTransTo(ctxt, ctxt->states[newto],
-			                          tmp->no);
-			}
-		    }
-		}
-#if 0
-	        for (i = 0;i < ctxt->nbStates;i++) {
-		    tmp = ctxt->states[i];
-		    for (j = 0;j < tmp->nbTrans;j++) {
-			if (tmp->trans[j].to == statenr) {
-			    tmp->trans[j].to = newto;
 #ifdef DEBUG_REGEXP_GRAPH
 			    printf("Changed transition %d on %d to go to %d\n",
 				   j, tmp->no, newto);
-#endif     
+#endif
+			    tmp->trans[j].to = -1;
+			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
+						ctxt->states[newto],
+					        tmp->trans[j].counter,
+						tmp->trans[j].count);
 			}
 		    }
 		}
-#endif
 		if (state->type == XML_REGEXP_FINAL_STATE)
 		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
 		/* eliminate the transition completely */
 		state->nbTrans = 0;
 
+                state->type = XML_REGEXP_UNREACH_STATE;
 
 	    }
-            
+
 	}
     }
 }
@@ -1779,16 +1928,33 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 
     if (ctxt->states == NULL) return;
 
+    /*
+     * Eliminate simple epsilon transition and the associated unreachable
+     * states.
+     */
     xmlFAEliminateSimpleEpsilonTransitions(ctxt);
+    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+	state = ctxt->states[statenr];
+	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
+#ifdef DEBUG_REGEXP_GRAPH
+	    printf("Removed unreachable state %d\n", statenr);
+#endif
+	    xmlRegFreeState(state);
+	    ctxt->states[statenr] = NULL;
+	}
+    }
 
     has_epsilon = 0;
 
     /*
-     * build the completed transitions bypassing the epsilons
+     * Build the completed transitions bypassing the epsilons
      * Use a marking algorithm to avoid loops
-     * mark sink states too.
+     * Mark sink states too.
+     * Process from the latests states backward to the start when
+     * there is long cascading epsilon chains this minimize the
+     * recursions and transition compares when adding the new ones
      */
-    for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+    for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
 	state = ctxt->states[statenr];
 	if (state == NULL)
 	    continue;
@@ -1812,8 +1978,9 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 		    printf("Found epsilon trans %d from %d to %d\n",
 			   transnr, statenr, newto);
 #endif
-		    state->mark = XML_REGEXP_MARK_START;
 		    has_epsilon = 1;
+		    state->trans[transnr].to = -2;
+		    state->mark = XML_REGEXP_MARK_START;
 		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
 				      newto, state->trans[transnr].counter);
 		    state->mark = XML_REGEXP_MARK_NORMAL;
@@ -1932,12 +2099,13 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
                (range2->type == XML_REGEXP_EPSILON)) {
 	return(0);
     } else if (range1->type == range2->type) {
-        if ((range1->type != XML_REGEXP_CHARVAL) ||
-	    (range1->end < range2->start) ||
-	    (range2->end < range1->start))
-	    ret = 1;
-	else
+        if (range1->type != XML_REGEXP_CHARVAL)
+            ret = 1;
+        else if ((range1->end < range2->start) ||
+	         (range2->end < range1->start))
 	    ret = 0;
+	else
+	    ret = 1;
     } else if (range1->type == XML_REGEXP_CHARVAL) {
         int codepoint;
 	int neg = 0;
@@ -1945,7 +2113,7 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
 	/*
 	 * just check all codepoints in the range for acceptance,
 	 * this is usually way cheaper since done only once at
-	 * compilation than testing over and over at runtime or 
+	 * compilation than testing over and over at runtime or
 	 * pushing too many states when evaluating.
 	 */
 	if (((range1->neg == 0) && (range2->neg != 0)) ||
@@ -2064,7 +2232,7 @@ xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
     if (((range1->neg == 0) && (range2->neg != 0)) ||
         ((range1->neg != 0) && (range2->neg == 0)))
 	ret = !ret;
-    return(1);
+    return(ret);
 }
 
 /**
@@ -2272,6 +2440,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
  * xmlFAEqualAtoms:
  * @atom1:  an atom
  * @atom2:  an atom
+ * @deep: if not set only compare string pointers
  *
  * Compares two atoms to check whether they are the same exactly
  * this is used to remove equivalent transitions
@@ -2279,7 +2448,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
  * Returns 1 if same and 0 otherwise
  */
 static int
-xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
     int ret = 0;
 
     if (atom1 == atom2)
@@ -2294,8 +2463,11 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
 	    ret = 0;
 	    break;
         case XML_REGEXP_STRING:
-	    ret = xmlStrEqual((xmlChar *)atom1->valuep,
-	                      (xmlChar *)atom2->valuep);
+            if (!deep)
+                ret = (atom1->valuep == atom2->valuep);
+            else
+                ret = xmlStrEqual((xmlChar *)atom1->valuep,
+                                  (xmlChar *)atom2->valuep);
 	    break;
         case XML_REGEXP_CHARVAL:
 	    ret = (atom1->codepoint == atom2->codepoint);
@@ -2313,6 +2485,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
  * xmlFACompareAtoms:
  * @atom1:  an atom
  * @atom2:  an atom
+ * @deep: if not set only compare string pointers
  *
  * Compares two atoms to check whether they intersect in some ways,
  * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
@@ -2320,7 +2493,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
  * Returns 1 if yes and 0 otherwise
  */
 static int
-xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
     int ret = 1;
 
     if (atom1 == atom2)
@@ -2346,8 +2519,11 @@ xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
     }
     switch (atom1->type) {
         case XML_REGEXP_STRING:
-	    ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
-	                                 (xmlChar *)atom2->valuep);
+            if (!deep)
+                ret = (atom1->valuep != atom2->valuep);
+            else
+                ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
+                                             (xmlChar *)atom2->valuep);
 	    break;
         case XML_REGEXP_EPSILON:
 	    goto not_determinist;
@@ -2410,9 +2586,16 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
     int res;
     int transnr, nbTrans;
     xmlRegTransPtr t1;
+    int deep = 1;
 
     if (state == NULL)
 	return(ret);
+    if (state->markd == XML_REGEXP_MARK_VISITED)
+	return(ret);
+
+    if (ctxt->flags & AM_AUTOMATA_RNG)
+        deep = 0;
+
     /*
      * don't recurse on transitions potentially added in the course of
      * the elimination.
@@ -2424,10 +2607,12 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 	 * check transitions conflicting with the one looked at
 	 */
 	if (t1->atom == NULL) {
-	    if (t1->to == -1)
+	    if (t1->to < 0)
 		continue;
+	    state->markd = XML_REGEXP_MARK_VISITED;
 	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
 		                           to, atom);
+	    state->markd = 0;
 	    if (res == 0) {
 	        ret = 0;
 		/* t1->nd = 1; */
@@ -2436,7 +2621,7 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 	}
 	if (t1->to != to)
 	    continue;
-	if (xmlFACompareAtoms(t1->atom, atom)) {
+	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
 	    ret = 0;
 	    /* mark the transition as non-deterministic */
 	    t1->nd = 1;
@@ -2460,6 +2645,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
     xmlRegTransPtr t1, t2, last;
     int i;
     int ret = 1;
+    int deep = 1;
 
 #ifdef DEBUG_REGEXP_GRAPH
     printf("xmlFAComputesDeterminism\n");
@@ -2468,6 +2654,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
     if (ctxt->determinist != -1)
 	return(ctxt->determinist);
 
+    if (ctxt->flags & AM_AUTOMATA_RNG)
+        deep = 0;
+
     /*
      * First cleanup the automata removing cancelled transitions
      */
@@ -2495,7 +2684,13 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 		    continue;
 		if (t2->atom != NULL) {
 		    if (t1->to == t2->to) {
-			if (xmlFAEqualAtoms(t1->atom, t2->atom))
+                        /*
+                         * Here we use deep because we want to keep the
+                         * transitions which indicate a conflict
+                         */
+			if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
+                            (t1->counter == t2->counter) &&
+                            (t1->count == t2->count))
 			    t2->to = -1; /* eliminated */
 		    }
 		}
@@ -2530,8 +2725,11 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 		if (t2->to == -1) /* eliminated */
 		    continue;
 		if (t2->atom != NULL) {
-		    /* not determinist ! */
-		    if (xmlFACompareAtoms(t1->atom, t2->atom)) {
+                    /*
+                     * But here we don't use deep because we want to
+                     * find transitions which indicate a conflict
+                     */
+		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
 			ret = 0;
 			/* mark the transitions as non-deterministic ones */
 			t1->nd = 1;
@@ -2583,9 +2781,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 }
 
 /************************************************************************
- * 									*
+ *									*
  *	Routines to check input against transition atoms		*
- * 									*
+ *									*
  ************************************************************************/
 
 static int
@@ -2614,7 +2812,7 @@ xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
         case XML_REGEXP_NOTINITNAME:
 	    neg = !neg;
         case XML_REGEXP_INITNAME:
-	    ret = (IS_LETTER(codepoint) || 
+	    ret = (IS_LETTER(codepoint) ||
 		   (codepoint == '_') || (codepoint == ':'));
 	    break;
         case XML_REGEXP_NOTNAMECHAR:
@@ -2862,9 +3060,9 @@ xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
 }
 
 /************************************************************************
- * 									*
+ *									*
  *	Saving and restoring state of an execution context		*
- * 									*
+ *									*
  ************************************************************************/
 
 #ifdef DEBUG_REGEXP_EXEC
@@ -2875,7 +3073,8 @@ xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
 	int i;
 	printf(": ");
 	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
-	    printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
+	    printf("%s ", (const char *)
+	           exec->inputStack[exec->inputStackNr - (i + 1)].value);
     } else {
 	printf(": %s", &(exec->inputString[exec->index]));
     }
@@ -2963,8 +3162,10 @@ xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
 	    exec->status = -6;
 	    return;
 	}
-	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
+	if (exec->counts) {
+	    memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
 	       exec->comp->nbCounters * sizeof(int));
+	}
     }
 
 #ifdef DEBUG_REGEXP_EXEC
@@ -2974,9 +3175,9 @@ xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
 }
 
 /************************************************************************
- * 									*
+ *									*
  *	Verifier, running an input against a compiled regexp		*
- * 									*
+ *									*
  ************************************************************************/
 
 static int
@@ -3008,9 +3209,10 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
         memset(exec->counts, 0, comp->nbCounters * sizeof(int));
     } else
 	exec->counts = NULL;
-    while ((exec->status == 0) &&
+    while ((exec->status == 0) && (exec->state != NULL) &&
 	   ((exec->inputString[exec->index] != 0) ||
-	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
+	    ((exec->state != NULL) &&
+	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
 	xmlRegTransPtr trans;
 	xmlRegAtomPtr atom;
 
@@ -3081,12 +3283,22 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
 		     * this is a multiple input sequence
 		     * If there is a counter associated increment it now.
 		     * before potentially saving and rollback
+		     * do not increment if the counter is already over the
+		     * maximum limit in which case get to next transition
 		     */
 		    if (trans->counter >= 0) {
-			if (exec->counts == NULL) {
+			xmlRegCounterPtr counter;
+
+			if ((exec->counts == NULL) ||
+			    (exec->comp == NULL) ||
+			    (exec->comp->counters == NULL)) {
 			    exec->status = -1;
 			    goto error;
 			}
+			counter = &exec->comp->counters[trans->counter];
+			if (exec->counts[trans->counter] >= counter->max)
+			    continue; /* for loop on transitions */
+
 #ifdef DEBUG_REGEXP_EXEC
 			printf("Increasing count %d\n", trans->counter);
 #endif
@@ -3182,10 +3394,18 @@ xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
 		    xmlFARegExecSave(exec);
 		}
 		if (trans->counter >= 0) {
-		    if (exec->counts == NULL) {
-		        exec->status = -1;
+		    xmlRegCounterPtr counter;
+
+                    /* make sure we don't go over the counter maximum value */
+		    if ((exec->counts == NULL) ||
+			(exec->comp == NULL) ||
+			(exec->comp->counters == NULL)) {
+			exec->status = -1;
 			goto error;
 		    }
+		    counter = &exec->comp->counters[trans->counter];
+		    if (exec->counts[trans->counter] >= counter->max)
+			continue; /* for loop on transitions */
 #ifdef DEBUG_REGEXP_EXEC
 		    printf("Increasing count %d\n", trans->counter);
 #endif
@@ -3243,6 +3463,8 @@ error:
 	}
 	xmlFree(exec->rollbacks);
     }
+    if (exec->state == NULL)
+        return(-1);
     if (exec->counts != NULL)
 	xmlFree(exec->counts);
     if (exec->status == 0)
@@ -3256,9 +3478,9 @@ error:
 }
 
 /************************************************************************
- * 									*
+ *									*
  *	Progressive interface to the verifier one atom at a time	*
- * 									*
+ *									*
  ************************************************************************/
 #ifdef DEBUG_ERR
 static void testerr(xmlRegExecCtxtPtr exec);
@@ -3375,7 +3597,7 @@ xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 #endif
     if (exec->inputStackMax == 0) {
 	exec->inputStackMax = 4;
-	exec->inputStack = (xmlRegInputTokenPtr) 
+	exec->inputStack = (xmlRegInputTokenPtr)
 	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
 	if (exec->inputStack == NULL) {
 	    xmlRegexpErrMemory(NULL, "pushing input string");
@@ -3404,11 +3626,11 @@ xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 
 /**
  * xmlRegStrEqualWildcard:
- * @expStr:  the string to be evaluated 
+ * @expStr:  the string to be evaluated
  * @valStr:  the validation string
  *
  * Checks if both strings are equal or have the same content. "*"
- * can be used as a wildcard in @valStr; "|" is used as a seperator of 
+ * can be used as a wildcard in @valStr; "|" is used as a seperator of
  * substrings in both @expStr and @valStr.
  *
  * Returns 1 if the comparison is satisfied and the number of substrings
@@ -3474,7 +3696,7 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
 
     if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
 	return(-1);
-    
+
     if (value == NULL) {
 	/*
 	 * are we at a final state ?
@@ -3495,9 +3717,9 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
     for (i = 0;i < comp->nbstrings;i++) {
 	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 	if ((target > 0) && (target <= comp->nbstates)) {
-	    target--; /* to avoid 0 */    
+	    target--; /* to avoid 0 */
 	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
-		exec->index = target;		
+		exec->index = target;
 		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
 		    exec->callback(exec->data, value,
 			  comp->transdata[state * comp->nbstrings + i], data);
@@ -3631,7 +3853,7 @@ xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
 			    continue;
 			counter = &exec->comp->counters[t->counter];
 			count = exec->counts[t->counter];
-			if ((count < counter->max) && 
+			if ((count < counter->max) &&
 		            (t->atom != NULL) &&
 			    (xmlStrEqual(value, t->atom->valuep))) {
 			    ret = 0;
@@ -3871,7 +4093,7 @@ rollback:
 	     */
 	    exec->determinist = 0;
 	    xmlFARegExecRollBack(exec);
-	    if (exec->status == 0) {
+	    if ((exec->inputStack != NULL ) && (exec->status == 0)) {
 		value = exec->inputStack[exec->index].value;
 		data = exec->inputStack[exec->index].data;
 #ifdef DEBUG_PUSH
@@ -3989,7 +4211,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
     int maxval;
     int nb = 0;
 
-    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
+    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
         (values == NULL) || (*nbval <= 0))
         return(-1);
 
@@ -4086,7 +4308,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
 		    (*nbval)++;
 		}
 	    } else {
-                if ((exec->comp->states[trans->to] != NULL) &&
+                if ((exec->comp != NULL) && (exec->comp->states[trans->to] != NULL) &&
 		    (exec->comp->states[trans->to]->type !=
 		     XML_REGEXP_SINK_STATE)) {
 		    if (atom->neg)
@@ -4095,7 +4317,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
 			values[nb++] = (xmlChar *) atom->valuep;
 		    (*nbval)++;
 		}
-	    } 
+	    }
 	}
 	for (transno = 0;
 	     (transno < state->nbTrans) && (nb < maxval);
@@ -4122,7 +4344,7 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
 			values[nb++] = (xmlChar *) atom->valuep;
 		    (*nbneg)++;
 		}
-	    } 
+	    }
 	}
     }
     return(0);
@@ -4353,10 +4575,10 @@ progress:
 }
 #endif
 /************************************************************************
- * 									*
+ *									*
  *	Parser for the Schemas Datatype Regular Expressions		*
  *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
- * 									*
+ *									*
  ************************************************************************/
 
 /**
@@ -4385,7 +4607,7 @@ xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
  *
  * [27]   charProp   ::=   IsCategory | IsBlock
  * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
- *                       Separators | Symbols | Others 
+ *                       Separators | Symbols | Others
  * [29]   Letters   ::=   'L' [ultmo]?
  * [30]   Marks   ::=   'M' [nce]?
  * [31]   Numbers   ::=   'N' [dlo]?
@@ -4400,7 +4622,7 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
     int cur;
     xmlRegAtomType type = (xmlRegAtomType) 0;
     xmlChar *blockName = NULL;
-    
+
     cur = CUR;
     if (cur == 'L') {
 	NEXT;
@@ -4572,15 +4794,15 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
 	NEXT;
 	start = ctxt->cur;
 	cur = CUR;
-	if (((cur >= 'a') && (cur <= 'z')) || 
-	    ((cur >= 'A') && (cur <= 'Z')) || 
-	    ((cur >= '0') && (cur <= '9')) || 
+	if (((cur >= 'a') && (cur <= 'z')) ||
+	    ((cur >= 'A') && (cur <= 'Z')) ||
+	    ((cur >= '0') && (cur <= '9')) ||
 	    (cur == 0x2D)) {
 	    NEXT;
 	    cur = CUR;
-	    while (((cur >= 'a') && (cur <= 'z')) || 
-		((cur >= 'A') && (cur <= 'Z')) || 
-		((cur >= '0') && (cur <= '9')) || 
+	    while (((cur >= 'a') && (cur <= 'z')) ||
+		((cur >= 'A') && (cur <= 'Z')) ||
+		((cur >= '0') && (cur <= '9')) ||
 		(cur == 0x2D)) {
 		NEXT;
 		cur = CUR;
@@ -4606,7 +4828,7 @@ xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
  * xmlFAParseCharClassEsc:
  * @ctxt:  a regexp parser context
  *
- * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 
+ * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
  * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
  * [25] catEsc   ::=   '\p{' charProp '}'
  * [26] complEsc ::=   '\P{' charProp '}'
@@ -4682,6 +4904,17 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
 		}
 	    }
 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+            switch (cur) {
+                case 'n':
+                    cur = '\n';
+                    break;
+                case 'r':
+                    cur = '\r';
+                    break;
+                case 't':
+                    cur = '\t';
+                    break;
+            }
 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 			       XML_REGEXP_CHARVAL, cur, cur, NULL);
 	}
@@ -4692,34 +4925,34 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
 	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
 
 	switch (cur) {
-	    case 's': 
+	    case 's':
 		type = XML_REGEXP_ANYSPACE;
 		break;
-	    case 'S': 
+	    case 'S':
 		type = XML_REGEXP_NOTSPACE;
 		break;
-	    case 'i': 
+	    case 'i':
 		type = XML_REGEXP_INITNAME;
 		break;
-	    case 'I': 
+	    case 'I':
 		type = XML_REGEXP_NOTINITNAME;
 		break;
-	    case 'c': 
+	    case 'c':
 		type = XML_REGEXP_NAMECHAR;
 		break;
-	    case 'C': 
+	    case 'C':
 		type = XML_REGEXP_NOTNAMECHAR;
 		break;
-	    case 'd': 
+	    case 'd':
 		type = XML_REGEXP_DECIMAL;
 		break;
-	    case 'D': 
+	    case 'D':
 		type = XML_REGEXP_NOTDECIMAL;
 		break;
-	    case 'w': 
+	    case 'w':
 		type = XML_REGEXP_REALCHAR;
 		break;
-	    case 'W': 
+	    case 'W':
 		type = XML_REGEXP_NOTREALCHAR;
 		break;
 	}
@@ -4730,72 +4963,16 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 			       type, 0, 0, NULL);
 	}
-    }
-}
-
-/**
- * xmlFAParseCharRef:
- * @ctxt:  a regexp parser context
- *
- * [19]   XmlCharRef   ::=   ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
- */
-static int
-xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
-    int ret = 0, cur;
-
-    if ((CUR != '&') || (NXT(1) != '#'))
-	return(-1);
-    NEXT;
-    NEXT;
-    cur = CUR;
-    if (cur == 'x') {
-	NEXT;
-	cur = CUR;
-	if (((cur >= '0') && (cur <= '9')) ||
-	    ((cur >= 'a') && (cur <= 'f')) ||
-	    ((cur >= 'A') && (cur <= 'F'))) {
-	    while (((cur >= '0') && (cur <= '9')) ||
-	           ((cur >= 'a') && (cur <= 'f')) ||
-		   ((cur >= 'A') && (cur <= 'F'))) {
-		if ((cur >= '0') && (cur <= '9'))
-		    ret = ret * 16 + cur - '0';
-		else if ((cur >= 'a') && (cur <= 'f'))
-		    ret = ret * 16 + 10 + (cur - 'a');
-		else
-		    ret = ret * 16 + 10 + (cur - 'A');
-		NEXT;
-		cur = CUR;
-	    }
-	} else {
-	    ERROR("Char ref: expecting [0-9A-F]");
-	    return(-1);
-	}
-    } else {
-	if ((cur >= '0') && (cur <= '9')) {
-	    while ((cur >= '0') && (cur <= '9')) {
-		ret = ret * 10 + cur - '0';
-		NEXT;
-		cur = CUR;
-	    }
-	} else {
-	    ERROR("Char ref: expecting [0-9]");
-	    return(-1);
-	}
-    }
-    if (cur != ';') {
-	ERROR("Char ref: expecting ';'");
-	return(-1);
     } else {
-	NEXT;
+	ERROR("Wrong escape sequence, misuse of character '\\'");
     }
-    return(ret);
 }
 
 /**
  * xmlFAParseCharRange:
  * @ctxt:  a regexp parser context
  *
- * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash 
+ * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
  * [18]   seRange   ::=   charOrEsc '-' charOrEsc
  * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
  * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
@@ -4812,12 +4989,6 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
 	return;
     }
 
-    if ((CUR == '&') && (NXT(1) == '#')) {
-	end = start = xmlFAParseCharRef(ctxt);
-        xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
-	                   XML_REGEXP_CHARVAL, start, end, NULL);
-	return;
-    }
     cur = CUR;
     if (cur == '\\') {
 	NEXT;
@@ -4842,10 +5013,15 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
 	ERROR("Expecting a char range");
 	return;
     }
-    NEXTL(len);
-    if (start == '-') {
+    /*
+     * Since we are "inside" a range, we can assume ctxt->cur is past
+     * the start of ctxt->string, and PREV should be safe
+     */
+    if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
+	NEXTL(len);
 	return;
     }
+    NEXTL(len);
     cur = CUR;
     if ((cur != '-') || (NXT(1) == ']')) {
         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
@@ -4896,7 +5072,7 @@ xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
 static void
 xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
     do {
-	if ((CUR == '\\') || (CUR == '.')) {
+	if (CUR == '\\') {
 	    xmlFAParseCharClassEsc(ctxt);
 	} else {
 	    xmlFAParseCharRange(ctxt);
@@ -4911,7 +5087,7 @@ xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
  *
  * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
  * [15]   negCharGroup ::= '^' posCharGroup
- * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr  
+ * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
  * [12]   charClassExpr ::= '[' charGroup ']'
  */
 static void
@@ -5085,9 +5261,15 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
     } else if (CUR == ')') {
 	return(0);
     } else if (CUR == '(') {
-	xmlRegStatePtr start, oldend;
+	xmlRegStatePtr start, oldend, start0;
 
 	NEXT;
+	/*
+	 * this extra Epsilon transition is needed if we count with 0 allowed
+	 * unfortunately this can't be known at that point
+	 */
+	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
+	start0 = ctxt->state;
 	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
 	start = ctxt->state;
 	oldend = ctxt->end;
@@ -5103,6 +5285,7 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
 	if (ctxt->atom == NULL)
 	    return(-1);
 	ctxt->atom->start = start;
+	ctxt->atom->start0 = start0;
 	ctxt->atom->stop = ctxt->state;
 	ctxt->end = oldend;
 	return(1);
@@ -5152,7 +5335,7 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
     previous = ctxt->state;
     ret = xmlFAParsePiece(ctxt);
     if (ret != 0) {
-	if (xmlFAGenerateTransitions(ctxt, previous, 
+	if (xmlFAGenerateTransitions(ctxt, previous,
 	        (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 	    return(-1);
 	previous = ctxt->state;
@@ -5161,7 +5344,7 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
     while ((ret != 0) && (ctxt->error == 0)) {
 	ret = xmlFAParsePiece(ctxt);
 	if (ret != 0) {
-	    if (xmlFAGenerateTransitions(ctxt, previous, 
+	    if (xmlFAGenerateTransitions(ctxt, previous,
 	            (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 		    return(-1);
 	    previous = ctxt->state;
@@ -5199,6 +5382,10 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
     end = ctxt->state;
     while ((CUR == '|') && (ctxt->error == 0)) {
 	NEXT;
+	if (CUR == 0) {
+	    ERROR("expecting a branch after |")
+	    return;
+	}
 	ctxt->state = start;
 	ctxt->end = NULL;
 	xmlFAParseBranch(ctxt, end);
@@ -5210,9 +5397,9 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
 }
 
 /************************************************************************
- * 									*
- * 			The basic API					*
- * 									*
+ *									*
+ *			The basic API					*
+ *									*
  ************************************************************************/
 
 /**
@@ -5281,6 +5468,10 @@ xmlRegexpCompile(const xmlChar *regexp) {
     if (CUR != 0) {
 	ERROR("xmlFAParseRegExp: extra characters");
     }
+    if (ctxt->error != 0) {
+	xmlRegFreeParserCtxt(ctxt);
+	return(NULL);
+    }
     ctxt->end = ctxt->state;
     ctxt->start->type = XML_REGEXP_START_STATE;
     ctxt->end->type = XML_REGEXP_FINAL_STATE;
@@ -5345,10 +5536,12 @@ xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
     am->nbStates = comp->nbStates;
     am->states = comp->states;
     am->determinist = -1;
+    am->flags = comp->flags;
     ret = xmlFAComputesDeterminism(am);
     am->atoms = NULL;
     am->states = NULL;
     xmlFreeAutomata(am);
+    comp->determinist = ret;
     return(ret);
 }
 
@@ -5393,9 +5586,9 @@ xmlRegFreeRegexp(xmlRegexpPtr regexp) {
 
 #ifdef LIBXML_AUTOMATA_ENABLED
 /************************************************************************
- * 									*
- * 			The Automata interface				*
- * 									*
+ *									*
+ *			The Automata interface				*
+ *									*
  ************************************************************************/
 
 /**
@@ -5426,6 +5619,7 @@ xmlNewAutomata(void) {
 	xmlFreeAutomata(ctxt);
 	return(NULL);
     }
+    ctxt->flags = 0;
 
     return(ctxt);
 }
@@ -5444,6 +5638,20 @@ xmlFreeAutomata(xmlAutomataPtr am) {
 }
 
 /**
+ * xmlAutomataSetFlags:
+ * @am: an automata
+ * @flags:  a set of internal flags
+ *
+ * Set some flags on the automata
+ */
+void
+xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
+    if (am == NULL)
+	return;
+    am->flags |= flags;
+}
+
+/**
  * xmlAutomataGetInitState:
  * @am: an automata
  *
@@ -5501,8 +5709,6 @@ xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
     if (atom == NULL)
         return(NULL);
     atom->data = data;
-    if (atom == NULL)
-	return(NULL);
     atom->valuep = xmlStrdup(token);
 
     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
@@ -5651,7 +5857,7 @@ xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  *
  * If @to is NULL, this creates first a new target state in the automata
  * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and @token2 and 
+ * activated by a succession of input of value @token and @token2 and
  * whose number is between @min and @max
  *
  * Returns the target state or NULL in case of error
@@ -5805,8 +6011,8 @@ xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  *
  * If @to is NULL, this creates first a new target state in the automata
  * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and @token2 and whose 
- * number is between @min and @max, moreover that transition can only be 
+ * activated by a succession of input of value @token and @token2 and whose
+ * number is between @min and @max, moreover that transition can only be
  * crossed once.
  *
  * Returns the target state or NULL in case of error
@@ -5848,7 +6054,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 	str[lenn + lenp + 1] = 0;
 
 	atom->valuep = str;
-    }    
+    }
     atom->data = data;
     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
     atom->min = min;
@@ -5871,7 +6077,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
     return(to);
 }
 
-    
+
 
 /**
  * xmlAutomataNewOnceTrans:
@@ -5940,7 +6146,7 @@ xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  */
 xmlAutomataStatePtr
 xmlAutomataNewState(xmlAutomataPtr am) {
-    xmlAutomataStatePtr to; 
+    xmlAutomataStatePtr to;
 
     if (am == NULL)
 	return(NULL);
@@ -6007,7 +6213,7 @@ xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  *
  * Returns the counter number or -1 in case of error
  */
-int		
+int
 xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
     int ret;
 
@@ -6079,7 +6285,7 @@ xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
  *
  * Returns the compiled regexp or NULL in case of error
  */
-xmlRegexpPtr          
+xmlRegexpPtr
 xmlAutomataCompile(xmlAutomataPtr am) {
     xmlRegexpPtr ret;
 
@@ -6099,7 +6305,7 @@ xmlAutomataCompile(xmlAutomataPtr am) {
  *
  * Returns 1 if true, 0 if not, and -1 in case of error
  */
-int          
+int
 xmlAutomataIsDeterminist(xmlAutomataPtr am) {
     int ret;
 
@@ -6129,6 +6335,7 @@ struct _xmlExpCtxt {
     int size;
     int nbElems;
     int nb_nodes;
+    int maxNodes;
     const char *expr;
     const char *cur;
     int nb_cons;
@@ -6151,13 +6358,14 @@ xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
 
     if (maxNodes <= 4096)
         maxNodes = 4096;
-    
+
     ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
     if (ret == NULL)
         return(NULL);
     memset(ret, 0, sizeof(xmlExpCtxt));
     ret->size = size;
     ret->nbElems = 0;
+    ret->maxNodes = maxNodes;
     ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
     if (ret->table == NULL) {
         xmlFree(ret);
@@ -6204,7 +6412,7 @@ xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
 /* #define DEBUG_DERIV */
 
 /*
- * TODO: 
+ * TODO:
  * - Wildcards
  * - public API for creation
  *
@@ -6272,7 +6480,7 @@ static unsigned short
 xmlExpHashNameComputeKey(const xmlChar *name) {
     unsigned short value = 0L;
     char ch;
-    
+
     if (name != NULL) {
 	value += 30 * (*name);
 	while ((ch = *name++) != 0) {
@@ -6291,7 +6499,7 @@ xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
                      xmlExpNodePtr right) {
     unsigned long value;
     unsigned short ret;
-    
+
     switch (type) {
         case XML_EXP_SEQ:
 	    value = left->key;
@@ -6432,7 +6640,7 @@ xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
 	    left->exp_left->ref++;
 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
 	                             NULL, 0, 0);
-	
+
 	    xmlExpFree(ctxt, left);
 	    return(tmp);
 	}
@@ -6489,7 +6697,7 @@ xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
 	    return(right);
 	}
 	kbase = xmlExpHashComputeKey(type, left, right);
-    } else 
+    } else
         return(NULL);
 
     key = kbase % ctxt->size;
@@ -6630,7 +6838,7 @@ xmlExpRef(xmlExpNodePtr exp) {
  * xmlExpNewAtom:
  * @ctxt: the expression context
  * @name: the atom name
- * @len: the atom name lenght in byte (or -1);
+ * @len: the atom name length in byte (or -1);
  *
  * Get the atom associated to this name from that context
  *
@@ -6730,7 +6938,7 @@ xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
  ************************************************************************/
 
 static int
-xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                      const xmlChar**list, int len, int nb) {
     int tmp, tmp2;
 tail:
@@ -6743,7 +6951,7 @@ tail:
 		    return(0);
             if (nb >= len)
 	        return(-2);
-	    list[nb++] = exp->exp_str;
+	    list[nb] = exp->exp_str;
 	    return(1);
         case XML_EXP_COUNT:
 	    exp = exp->exp_left;
@@ -6767,7 +6975,7 @@ tail:
  * @ctxt: the expression context
  * @exp: the expression
  * @langList: where to store the tokens
- * @len: the allocated lenght of @list
+ * @len: the allocated length of @list
  *
  * Find all the strings used in @exp and store them in @list
  *
@@ -6775,7 +6983,7 @@ tail:
  *         -2 if there is more than @len strings
  */
 int
-xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                   const xmlChar**langList, int len) {
     if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
         return(-1);
@@ -6783,7 +6991,7 @@ xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 }
 
 static int
-xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                   const xmlChar**list, int len, int nb) {
     int tmp, tmp2;
 tail:
@@ -6798,7 +7006,7 @@ tail:
 		    return(0);
             if (nb >= len)
 	        return(-2);
-	    list[nb++] = exp->exp_str;
+	    list[nb] = exp->exp_str;
 	    return(1);
         case XML_EXP_COUNT:
 	    exp = exp->exp_left;
@@ -6833,7 +7041,7 @@ tail:
  * @ctxt: the expression context
  * @exp: the expression
  * @tokList: where to store the tokens
- * @len: the allocated lenght of @list
+ * @len: the allocated length of @list
  *
  * Find all the strings that appears at the start of the languages
  * accepted by @exp and store them in @list. E.g. for (a, b) | c
@@ -6843,7 +7051,7 @@ tail:
  *         -2 if there is more than @len strings
  */
 int
-xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
+xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
                const xmlChar**tokList, int len) {
     if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
         return(-1);
@@ -7540,7 +7748,7 @@ xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
     xmlFree((xmlChar **) tab);
     return(ret);
 }
-    
+
 /**
  * xmlExpExpDerive:
  * @ctxt: the expressions context
@@ -7592,7 +7800,7 @@ xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 int
 xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
     xmlExpNodePtr tmp;
-    
+
     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
         return(-1);
 
@@ -7636,7 +7844,7 @@ xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 
 /************************************************************************
  *									*
- *			Parsing expression 				*
+ *			Parsing expression				*
  *									*
  ************************************************************************/
 
@@ -7740,7 +7948,7 @@ parse_quantifier:
 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 	                         0, -1);
 	SKIP_BLANKS
-    } 
+    }
     return(ret);
 }
 
@@ -7862,7 +8070,7 @@ xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
             break;
         case XML_EXP_COUNT: {
 	    char rep[40];
-	    
+
 	    c = expr->exp_left;
 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 	        xmlExpDumpInt(buf, c, 1);