root/todo/tags/0.1.20/util/c_regex.c

Revision 442, 243.2 kB (checked in by athomas, 1 year ago)

Applied patch from swapoff.20.aarre@spamgourmet.com, closes #48. Thanks :)

Line 
1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P1003.2/D11.2, except for some of the
4    internationalization features.)
5    Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
6
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Library General Public License as
9    published by the Free Software Foundation; either version 2 of the
10    License, or (at your option) any later version.
11
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Library General Public License for more details.
16
17    You should have received a copy of the GNU Library General Public
18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20    Boston, MA 02111-1307, USA.  */
21
22 /* AIX requires this to be the first thing in the file. */
23 #if defined _AIX && !defined REGEX_MALLOC
24   #pragma alloca
25 #endif
26
27 #undef  _GNU_SOURCE
28 #define _GNU_SOURCE
29
30 #ifdef HAVE_CONFIG_H
31 # include <config.h>
32 #endif
33
34 #ifndef PARAMS
35 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
36 #  define PARAMS(args) args
37 # else
38 #  define PARAMS(args) ()
39 # endif  /* GCC.  */
40 #endif  /* Not PARAMS.  */
41
42 #if defined STDC_HEADERS && !defined emacs
43 # include <stddef.h>
44 #else
45 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
46 # include <sys/types.h>
47 #endif
48
49 #include <stdlib.h>
50
51 #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
52
53 /* For platform which support the ISO C amendement 1 functionality we
54    support user defined character classes.  */
55 #if defined _LIBC || WIDE_CHAR_SUPPORT
56 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
57 # include <wchar.h>
58 # include <wctype.h>
59 #endif
60
61 /* This is for multi byte string support.  */
62 #ifdef MBS_SUPPORT
63 # define CHAR_TYPE wchar_t
64 # define US_CHAR_TYPE wchar_t/* unsigned character type */
65 # define COMPILED_BUFFER_VAR wc_buffer
66 # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
67 # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1)
68 # define PUT_CHAR(c) \
69   do {                                                                        \
70     if (MB_CUR_MAX == 1)                                                      \
71       putchar (c);                                                            \
72     else                                                                      \
73       printf ("%C", (wint_t) c); /* Should we use wide stream??  */           \
74   } while (0)
75 # define TRUE 1
76 # define FALSE 0
77 #else
78 # define CHAR_TYPE char
79 # define US_CHAR_TYPE unsigned char /* unsigned character type */
80 # define COMPILED_BUFFER_VAR bufp->buffer
81 # define OFFSET_ADDRESS_SIZE 2
82 # define PUT_CHAR(c) putchar (c)
83 #endif /* MBS_SUPPORT */
84
85 #ifdef _LIBC
86 /* We have to keep the namespace clean.  */
87 # define regfree(preg) __regfree (preg)
88 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
89 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
90 # define regerror(errcode, preg, errbuf, errbuf_size) \
91         __regerror(errcode, preg, errbuf, errbuf_size)
92 # define re_set_registers(bu, re, nu, st, en) \
93         __re_set_registers (bu, re, nu, st, en)
94 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
95         __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
96 # define re_match(bufp, string, size, pos, regs) \
97         __re_match (bufp, string, size, pos, regs)
98 # define re_search(bufp, string, size, startpos, range, regs) \
99         __re_search (bufp, string, size, startpos, range, regs)
100 # define re_compile_pattern(pattern, length, bufp) \
101         __re_compile_pattern (pattern, length, bufp)
102 # define re_set_syntax(syntax) __re_set_syntax (syntax)
103 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
104         __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
105 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
106
107 # define btowc __btowc
108
109 /* We are also using some library internals.  */
110 # include <locale/localeinfo.h>
111 # include <locale/elem-hash.h>
112 # include <langinfo.h>
113 # include <locale/coll-lookup.h>
114 #endif
115
116 /* This is for other GNU distributions with internationalized messages.  */
117 #if HAVE_LIBINTL_H || defined _LIBC
118 # include <libintl.h>
119 # ifdef _LIBC
120 #  undef gettext
121 #  define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
122 # endif
123 #else
124 # define gettext(msgid) (msgid)
125 #endif
126
127 #ifndef gettext_noop
128 /* This define is so xgettext can find the internationalizable
129    strings.  */
130 # define gettext_noop(String) String
131 #endif
132
133 /* The `emacs' switch turns on certain matching commands
134    that make sense only in Emacs. */
135 #ifdef emacs
136
137 # include "lisp.h"
138 # include "buffer.h"
139 # include "syntax.h"
140
141 #else  /* not emacs */
142
143 /* If we are not linking with Emacs proper,
144    we can't use the relocating allocator
145    even if config.h says that we can.  */
146 # undef REL_ALLOC
147
148 #  include <stdlib.h>
149
150 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
151    If nothing else has been done, use the method below.  */
152 # ifdef INHIBIT_STRING_HEADER
153 #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
154 #   if !defined bzero && !defined bcopy
155 #    undef INHIBIT_STRING_HEADER
156 #   endif
157 #  endif
158 # endif
159
160 /* This is the normal way of making sure we have a bcopy and a bzero.
161    This is used in most programs--a few other programs avoid this
162    by defining INHIBIT_STRING_HEADER.  */
163 # ifndef INHIBIT_STRING_HEADER
164 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
165 #   include <string.h>
166 #   ifndef bzero
167 #    ifndef _LIBC
168 #     define bzero(s, n)        (memset (s, '\0', n), (s))
169 #    else
170 #     define bzero(s, n)        __bzero (s, n)
171 #    endif
172 #   endif
173 #  else
174 #   include <strings.h>
175 #   ifndef memcmp
176 #    define memcmp(s1, s2, n)   bcmp (s1, s2, n)
177 #   endif
178 #   ifndef memcpy
179 #    define memcpy(d, s, n)     (bcopy (s, d, n), (d))
180 #   endif
181 #  endif
182 # endif
183
184 /* Define the syntax stuff for \<, \>, etc.  */
185
186 /* This must be nonzero for the wordchar and notwordchar pattern
187    commands in re_match_2.  */
188 # ifndef Sword
189 #  define Sword 1
190 # endif
191
192 # ifdef SWITCH_ENUM_BUG
193 #  define SWITCH_ENUM_CAST(x) ((int)(x))
194 # else
195 #  define SWITCH_ENUM_CAST(x) (x)
196 # endif
197
198 #endif /* not emacs */
199
200 #if defined _LIBC || HAVE_LIMITS_H
201 # include <limits.h>
202 #endif
203
204 #ifndef MB_LEN_MAX
205 # define MB_LEN_MAX 1
206 #endif
207
208 /* Get the interface, including the syntax bits.  */
209 #include "c_regex.h"
210
211 /* isalpha etc. are used for the character classes.  */
212 #include <ctype.h>
213
214 /* Jim Meyering writes:
215
216    "... Some ctype macros are valid only for character codes that
217    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
218    using /bin/cc or gcc but without giving an ansi option).  So, all
219    ctype uses should be through macros like ISPRINT...  If
220    STDC_HEADERS is defined, then autoconf has verified that the ctype
221    macros don't need to be guarded with references to isascii. ...
222    Defining isascii to 1 should let any compiler worth its salt
223    eliminate the && through constant folding."
224    Solaris defines some of these symbols so we must undefine them first.  */
225
226 #undef ISASCII
227 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
228 # define ISASCII(c) 1
229 #else
230 # define ISASCII(c) isascii(c)
231 #endif
232
233 #ifdef isblank
234 # define ISBLANK(c) (ISASCII (c) && isblank (c))
235 #else
236 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
237 #endif
238 #ifdef isgraph
239 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
240 #else
241 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
242 #endif
243
244 #undef ISPRINT
245 #define ISPRINT(c) (ISASCII (c) && isprint (c))
246 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
247 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
248 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
249 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
250 #define ISLOWER(c) (ISASCII (c) && islower (c))
251 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
252 #define ISSPACE(c) (ISASCII (c) && isspace (c))
253 #define ISUPPER(c) (ISASCII (c) && isupper (c))
254 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
255
256 #ifdef _tolower
257 # define TOLOWER(c) _tolower(c)
258 #else
259 # define TOLOWER(c) tolower(c)
260 #endif
261
262 #ifndef NULL
263 # define NULL (void *)0
264 #endif
265
266 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
267    since ours (we hope) works properly with all combinations of
268    machines, compilers, `char' and `unsigned char' argument types.
269    (Per Bothner suggested the basic approach.)  */
270 #undef SIGN_EXTEND_CHAR
271 #if __STDC__
272 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
273 #else  /* not __STDC__ */
274 /* As in Harbison and Steele.  */
275 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
276 #endif
277
278 #ifndef emacs
279 /* How many characters in the character set.  */
280 # define CHAR_SET_SIZE 256
281
282 # ifdef SYNTAX_TABLE
283
284 extern char *re_syntax_table;
285
286 # else /* not SYNTAX_TABLE */
287
288 static char re_syntax_table[CHAR_SET_SIZE];
289
290 static void init_syntax_once PARAMS ((void));
291
292 static void
293 init_syntax_once ()
294 {
295    register int c;
296    static int done = 0;
297
298    if (done)
299      return;
300    bzero (re_syntax_table, sizeof re_syntax_table);
301
302    for (c = 0; c < CHAR_SET_SIZE; ++c)
303      if (ISALNUM (c))
304         re_syntax_table[c] = Sword;
305
306    re_syntax_table['_'] = Sword;
307
308    done = 1;
309 }
310
311 # endif /* not SYNTAX_TABLE */
312
313 # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
314
315 #endif /* emacs */
316
317 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
318    use `alloca' instead of `malloc'.  This is because using malloc in
319    re_search* or re_match* could cause memory leaks when C-g is used in
320    Emacs; also, malloc is slower and causes storage fragmentation.  On
321    the other hand, malloc is more portable, and easier to debug.
322
323    Because we sometimes use alloca, some routines have to be macros,
324    not functions -- `alloca'-allocated space disappears at the end of the
325    function it is called in.  */
326
327 #ifdef REGEX_MALLOC
328
329 # define REGEX_ALLOCATE malloc
330 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
331 # define REGEX_FREE free
332
333 #else /* not REGEX_MALLOC  */
334
335 /* Emacs already defines alloca, sometimes.  */
336 # ifndef alloca
337
338 /* Make alloca work the best possible way.  */
339 #  ifdef __GNUC__
340 #   define alloca __builtin_alloca
341 #  else /* not __GNUC__ */
342 #   if HAVE_ALLOCA_H
343 #    include <alloca.h>
344 #   endif /* HAVE_ALLOCA_H */
345 #  endif /* not __GNUC__ */
346
347 # endif /* not alloca */
348
349 # define REGEX_ALLOCATE alloca
350
351 /* Assumes a `char *destination' variable.  */
352 # define REGEX_REALLOCATE(source, osize, nsize)                         \
353   (destination = (char *) alloca (nsize),                               \
354    memcpy (destination, source, osize))
355
356 /* No need to do anything to free, after alloca.  */
357 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
358
359 #endif /* not REGEX_MALLOC */
360
361 /* Define how to allocate the failure stack.  */
362
363 #if defined REL_ALLOC && defined REGEX_MALLOC
364
365 # define REGEX_ALLOCATE_STACK(size)                             \
366   r_alloc (&failure_stack_ptr, (size))
367 # define REGEX_REALLOCATE_STACK(source, osize, nsize)           \
368   r_re_alloc (&failure_stack_ptr, (nsize))
369 # define REGEX_FREE_STACK(ptr)                                  \
370   r_alloc_free (&failure_stack_ptr)
371
372 #else /* not using relocating allocator */
373
374 # ifdef REGEX_MALLOC
375
376 #  define REGEX_ALLOCATE_STACK malloc
377 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
378 #  define REGEX_FREE_STACK free
379
380 # else /* not REGEX_MALLOC */
381
382 #  define REGEX_ALLOCATE_STACK alloca
383
384 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)                  \
385    REGEX_REALLOCATE (source, osize, nsize)
386 /* No need to explicitly free anything.  */
387 #  define REGEX_FREE_STACK(arg)
388
389 # endif /* not REGEX_MALLOC */
390 #endif /* not using relocating allocator */
391
392
393 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
394    `string1' or just past its end.  This works if PTR is NULL, which is
395    a good thing.  */
396 #define FIRST_STRING_P(ptr)                                     \
397   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
398
399 /* (Re)Allocate N items of type T using malloc, or fail.  */
400 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
401 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
402 #define RETALLOC_IF(addr, n, t) \
403   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
404 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
405
406 #define BYTEWIDTH 8 /* In bits.  */
407
408 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
409
410 #undef MAX
411 #undef MIN
412 #define MAX(a, b) ((a) > (b) ? (a) : (b))
413 #define MIN(a, b) ((a) < (b) ? (a) : (b))
414
415 typedef char boolean;
416 #define false 0
417 #define true 1
418
419 static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
420                                         const char *string1, int size1,
421                                         const char *string2, int size2,
422                                         int pos,
423                                         struct re_registers *regs,
424                                         int stop));
425
426 /* These are the command codes that appear in compiled regular
427    expressions.  Some opcodes are followed by argument bytes.  A
428    command code can specify any interpretation whatsoever for its
429    arguments.  Zero bytes may appear in the compiled regular expression.  */
430
431 typedef enum
432 {
433   no_op = 0,
434
435   /* Succeed right away--no more backtracking.  */
436   succeed,
437
438         /* Followed by one byte giving n, then by n literal bytes.  */
439   exactn,
440
441 #ifdef MBS_SUPPORT
442         /* Same as exactn, but contains binary data.  */
443   exactn_bin,
444 #endif
445
446         /* Matches any (more or less) character.  */
447   anychar,
448
449         /* Matches any one char belonging to specified set.  First
450            following byte is number of bitmap bytes.  Then come bytes
451            for a bitmap saying which chars are in.  Bits in each byte
452            are ordered low-bit-first.  A character is in the set if its
453            bit is 1.  A character too large to have a bit in the map is
454            automatically not in the set.  */
455         /* ifdef MBS_SUPPORT, following element is length of character
456            classes, length of collating symbols, length of equivalence
457            classes, length of character ranges, and length of characters.
458            Next, character class element, collating symbols elements,
459            equivalence class elements, range elements, and character
460            elements follow.
461            See regex_compile function.  */
462   charset,
463
464         /* Same parameters as charset, but match any character that is
465            not one of those specified.  */
466   charset_not,
467
468         /* Start remembering the text that is matched, for storing in a
469            register.  Followed by one byte with the register number, in
470            the range 0 to one less than the pattern buffer's re_nsub
471            field.  Then followed by one byte with the number of groups
472            inner to this one.  (This last has to be part of the
473            start_memory only because we need it in the on_failure_jump
474            of re_match_2.)  */
475   start_memory,
476
477         /* Stop remembering the text that is matched and store it in a
478            memory register.  Followed by one byte with the register
479            number, in the range 0 to one less than `re_nsub' in the
480            pattern buffer, and one byte with the number of inner groups,
481            just like `start_memory'.  (We need the number of inner
482            groups here because we don't have any easy way of finding the
483            corresponding start_memory when we're at a stop_memory.)  */
484   stop_memory,
485
486         /* Match a duplicate of something remembered. Followed by one
487            byte containing the register number.  */
488   duplicate,
489
490         /* Fail unless at beginning of line.  */
491   begline,
492
493         /* Fail unless at end of line.  */
494   endline,
495
496         /* Succeeds if at beginning of buffer (if emacs) or at beginning
497            of string to be matched (if not).  */
498   begbuf,
499
500         /* Analogously, for end of buffer/string.  */
501   endbuf,
502
503         /* Followed by two byte relative address to which to jump.  */
504   jump,
505
506         /* Same as jump, but marks the end of an alternative.  */
507   jump_past_alt,
508
509         /* Followed by two-byte relative address of place to resume at
510            in case of failure.  */
511         /* ifdef MBS_SUPPORT, the size of address is 1.  */
512   on_failure_jump,
513
514         /* Like on_failure_jump, but pushes a placeholder instead of the
515            current string position when executed.  */
516   on_failure_keep_string_jump,
517
518         /* Throw away latest failure point and then jump to following
519            two-byte relative address.  */
520         /* ifdef MBS_SUPPORT, the size of address is 1.  */
521   pop_failure_jump,
522
523         /* Change to pop_failure_jump if know won't have to backtrack to
524            match; otherwise change to jump.  This is used to jump
525            back to the beginning of a repeat.  If what follows this jump
526            clearly won't match what the repeat does, such that we can be
527            sure that there is no use backtracking out of repetitions
528            already matched, then we change it to a pop_failure_jump.
529            Followed by two-byte address.  */
530         /* ifdef MBS_SUPPORT, the size of address is 1.  */
531   maybe_pop_jump,
532
533         /* Jump to following two-byte address, and push a dummy failure
534            point. This failure point will be thrown away if an attempt
535            is made to use it for a failure.  A `+' construct makes this
536            before the first repeat.  Also used as an intermediary kind
537            of jump when compiling an alternative.  */
538         /* ifdef MBS_SUPPORT, the size of address is 1.  */
539   dummy_failure_jump,
540
541         /* Push a dummy failure point and continue.  Used at the end of
542            alternatives.  */
543   push_dummy_failure,
544
545         /* Followed by two-byte relative address and two-byte number n.
546            After matching N times, jump to the address upon failure.  */
547         /* ifdef MBS_SUPPORT, the size of address is 1.  */
548   succeed_n,
549
550         /* Followed by two-byte relative address, and two-byte number n.
551            Jump to the address N times, then fail.  */
552         /* ifdef MBS_SUPPORT, the size of address is 1.  */
553   jump_n,
554
555         /* Set the following two-byte relative address to the
556            subsequent two-byte number.  The address *includes* the two
557            bytes of number.  */
558         /* ifdef MBS_SUPPORT, the size of address is 1.  */
559   set_number_at,
560
561   wordchar,     /* Matches any word-constituent character.  */
562   notwordchar,  /* Matches any char that is not a word-constituent.  */
563
564   wordbeg,      /* Succeeds if at word beginning.  */
565   wordend,      /* Succeeds if at word end.  */
566
567   wordbound,    /* Succeeds if at a word boundary.  */
568   notwordbound  /* Succeeds if not at a word boundary.  */
569
570 #ifdef emacs
571   ,before_dot,  /* Succeeds if before point.  */
572   at_dot,       /* Succeeds if at point.  */
573   after_dot,    /* Succeeds if after point.  */
574
575         /* Matches any character whose syntax is specified.  Followed by
576            a byte which contains a syntax code, e.g., Sword.  */
577   syntaxspec,
578
579         /* Matches any character whose syntax is not that specified.  */
580   notsyntaxspec
581 #endif /* emacs */
582 } re_opcode_t;
583
584 /* Common operations on the compiled pattern.  */
585
586 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
587 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
588
589 #ifdef MBS_SUPPORT
590 # define STORE_NUMBER(destination, number)                              \
591   do {                                                                  \
592     *(destination) = (US_CHAR_TYPE)(number);                            \
593   } while (0)
594 #else
595 # define STORE_NUMBER(destination, number)                              \
596   do {                                                                  \
597     (destination)[0] = (number) & 0377;                                 \
598     (destination)[1] = (number) >> 8;                                   \
599   } while (0)
600 #endif /* MBS_SUPPORT */
601
602 /* Same as STORE_NUMBER, except increment DESTINATION to
603    the byte after where the number is stored.  Therefore, DESTINATION
604    must be an lvalue.  */
605 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
606
607 #define STORE_NUMBER_AND_INCR(destination, number)                      \
608   do {                                                                  \
609     STORE_NUMBER (destination, number);                                 \
610     (destination) += OFFSET_ADDRESS_SIZE;                               \
611   } while (0)
612
613 /* Put into DESTINATION a number stored in two contiguous bytes starting
614    at SOURCE.  */
615 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
616
617 #ifdef MBS_SUPPORT
618 # define EXTRACT_NUMBER(destination, source)                            \
619   do {                                                                  \
620     (destination) = *(source);                                          \
621   } while (0)
622 #else
623 # define EXTRACT_NUMBER(destination, source)                            \
624   do {                                                                  \
625     (destination) = *(source) & 0377;                                   \
626     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
627   } while (0)
628 #endif
629
630 #ifdef DEBUG
631 static void extract_number _RE_ARGS ((int *dest, US_CHAR_TYPE *source));
632 static void
633 extract_number (dest, source)
634     int *dest;
635     US_CHAR_TYPE *source;
636 {
637 #ifdef MBS_SUPPORT
638   *dest = *source;
639 #else
640   int temp = SIGN_EXTEND_CHAR (*(source + 1));
641   *dest = *source & 0377;
642   *dest += temp << 8;
643 #endif
644 }
645
646 # ifndef EXTRACT_MACROS /* To debug the macros.  */
647 #  undef EXTRACT_NUMBER
648 #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
649 # endif /* not EXTRACT_MACROS */
650
651 #endif /* DEBUG */
652
653 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
654    SOURCE must be an lvalue.  */
655
656 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
657   do {                                                                  \
658     EXTRACT_NUMBER (destination, source);                               \
659     (source) += OFFSET_ADDRESS_SIZE;                                    \
660   } while (0)
661
662 #ifdef DEBUG
663 static void extract_number_and_incr _RE_ARGS ((int *destination,
664                                                US_CHAR_TYPE **source));
665 static void
666 extract_number_and_incr (destination, source)
667     int *destination;
668     US_CHAR_TYPE **source;
669 {
670   extract_number (destination, *source);
671   *source += OFFSET_ADDRESS_SIZE;
672 }
673
674 # ifndef EXTRACT_MACROS
675 #  undef EXTRACT_NUMBER_AND_INCR
676 #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
677   extract_number_and_incr (&dest, &src)
678 # endif /* not EXTRACT_MACROS */
679
680 #endif /* DEBUG */
681
682 /* If DEBUG is defined, Regex prints many voluminous messages about what
683    it is doing (if the variable `debug' is nonzero).  If linked with the
684    main program in `iregex.c', you can enter patterns and strings
685    interactively.  And if linked with the main program in `main.c' and
686    the other test files, you can run the already-written tests.  */
687
688 #ifdef DEBUG
689
690 /* We use standard I/O for debugging.  */
691 # include <stdio.h>
692
693 /* It is useful to test things that ``must'' be true when debugging.  */
694 # include <assert.h>
695
696 static int debug;
697
698 # define DEBUG_STATEMENT(e) e
699 # define DEBUG_PRINT1(x) if (debug) printf (x)
700 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
701 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
702 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
703 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
704   if (debug) print_partial_compiled_pattern (s, e)
705 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
706   if (debug) print_double_string (w, s1, sz1, s2, sz2)
707
708
709 /* Print the fastmap in human-readable form.  */
710
711 void
712 print_fastmap (fastmap)
713     char *fastmap;
714 {
715   unsigned was_a_range = 0;
716   unsigned i = 0;
717
718   while (i < (1 << BYTEWIDTH))
719     {
720       if (fastmap[i++])
721         {
722           was_a_range = 0;
723           putchar (i - 1);
724           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
725             {
726               was_a_range = 1;
727               i++;
728             }
729           if (was_a_range)
730             {
731               printf ("-");
732               putchar (i - 1);
733             }
734         }
735     }
736   putchar ('\n');
737 }
738
739
740 /* Print a compiled pattern string in human-readable form, starting at
741    the START pointer into it and ending just before the pointer END.  */
742
743 void
744 print_partial_compiled_pattern (start, end)
745     US_CHAR_TYPE *start;
746     US_CHAR_TYPE *end;
747 {
748   int mcnt, mcnt2;
749   US_CHAR_TYPE *p1;
750   US_CHAR_TYPE *p = start;
751   US_CHAR_TYPE *pend = end;
752
753   if (start == NULL)
754     {
755       printf ("(null)\n");
756       return;
757     }
758
759   /* Loop over pattern commands.  */
760   while (p < pend)
761     {
762 #ifdef _LIBC
763       printf ("%td:\t", p - start);
764 #else
765       printf ("%ld:\t", (long int) (p - start));
766 #endif
767
768       switch ((re_opcode_t) *p++)
769         {
770         case no_op:
771           printf ("/no_op");
772           break;
773
774         case exactn:
775           mcnt = *p++;
776           printf ("/exactn/%d", mcnt);
777           do
778             {
779               putchar ('/');
780               PUT_CHAR (*p++);
781             }
782           while (--mcnt);
783           break;
784
785 #ifdef MBS_SUPPORT
786         case exactn_bin:
787           mcnt = *p++;
788           printf ("/exactn_bin/%d", mcnt);
789           do
790             {
791               printf("/%lx", (long int) *p++);
792             }
793           while (--mcnt);
794           break;
795 #endif /* MBS_SUPPORT */
796
797         case start_memory:
798           mcnt = *p++;
799           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
800           break;
801
802         case stop_memory:
803           mcnt = *p++;
804           printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
805           break;
806
807         case duplicate:
808           printf ("/duplicate/%ld", (long int) *p++);
809           break;
810
811         case anychar:
812           printf ("/anychar");
813           break;
814
815         case charset:
816         case charset_not:
817           {
818 #ifdef MBS_SUPPORT
819             int i, length;
820             wchar_t *workp = p;
821             printf ("/charset [%s",
822                     (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
823             p += 5;
824             length = *workp++; /* the length of char_classes */
825             for (i=0 ; i<length ; i++)
826               printf("[:%lx:]", (long int) *p++);
827             length = *workp++; /* the length of collating_symbol */
828             for (i=0 ; i<length ;)
829               {
830                 printf("[.");
831                 while(*p != 0)
832                   PUT_CHAR((i++,*p++));
833                 i++,p++;
834                 printf(".]");
835               }
836             length = *workp++; /* the length of equivalence_class */
837             for (i=0 ; i<length ;)
838               {
839                 printf("[=");
840                 while(*p != 0)
841                   PUT_CHAR((i++,*p++));
842                 i++,p++;
843                 printf("=]");
844               }
845             length = *workp++; /* the length of char_range */
846             for (i=0 ; i<length ; i++)
847               {
848                 wchar_t range_start = *<