diff -rNC3 sed-4.0.3/BUGS sed-4.0.4/BUGS *** sed-4.0.3/BUGS Fri Nov 8 21:39:59 2002 --- sed-4.0.4/BUGS Fri Nov 29 18:47:07 2002 *************** *** 45,50 **** --- 45,59 ---- approaches are allowed by POSIX.2.) + `N' command on the last line + + Most versions of sed exit without printing anything when the `N' + command is issued on the last line of a file. GNU sed instead + prints pattern space before exiting unless of course the `-n' + command switch has been specified. More information on the reason + behind this choice can be found in the Info manual. + + regexp syntax clashes sed uses the Posix basic regular expression syntax. According to diff -rNC3 sed-4.0.3/ChangeLog sed-4.0.4/ChangeLog *** sed-4.0.3/ChangeLog Mon Oct 28 07:56:29 2002 --- sed-4.0.4/ChangeLog Thu Dec 12 20:07:41 2002 *************** *** 1,3 **** --- 1,23 ---- + 2002-12-12 Paolo Bonzini + + * Version 4.0.4 released + + 2002-11-21 Paolo Bonzini + + * Version 4.0.3 released + + 2002-11-19 Paolo Bonzini + + * Version 4.0.2 released + + 2002-11-05 Paolo Bonzini + + * Version 4.0.1 released + + 2002-10-23 Paolo Bonzini + + * Version 4.0 released + 2002-10-28 Paolo Bonzini * lib/utils.c: don't fail for EBADF in fflush diff -rNC3 sed-4.0.3/NEWS sed-4.0.4/NEWS *** sed-4.0.3/NEWS Thu Nov 21 12:45:35 2002 --- sed-4.0.4/NEWS Fri Nov 29 19:00:40 2002 *************** *** 1,3 **** --- 1,10 ---- + Sed 4.0.4 + + * documentation fixes + + * update regex matcher + + ---------------------------------------------------------------------------- Sed 4.0.3 * fix packaging problem (two missing translation catalogs) diff -rNC3 sed-4.0.3/THANKS sed-4.0.4/THANKS *** sed-4.0.3/THANKS Sat Oct 19 09:03:48 2002 --- sed-4.0.4/THANKS Fri Dec 6 17:27:48 2002 *************** *** 1,6 **** --- 1,8 ---- Alan Modra + Arnold Robbins Andreas Schwab Andrew Herbert + Bruno Haible Chip Salzenberg Chris Weber David Eckelkamp *************** *** 15,23 **** Gaumond Pierre Greg Ubben J.T. Conklin - Bruno Haible Jason Molenda Jim Meyering Karl Heuer Kaveh R. Ghazi Kevin Buettner --- 17,25 ---- Gaumond Pierre Greg Ubben J.T. Conklin Jason Molenda Jim Meyering + Karl Berry Karl Heuer Kaveh R. Ghazi Kevin Buettner diff -rNC3 sed-4.0.3/bootstrap.sh sed-4.0.4/bootstrap.sh *** sed-4.0.3/bootstrap.sh Thu Nov 21 12:45:01 2002 --- sed-4.0.4/bootstrap.sh Thu Dec 12 20:14:01 2002 *************** *** 19,25 **** */ #define PACKAGE "sed" ! #define VERSION "4.0.3-boot" #define BOOTSTRAP 1 /* Undefine if your compiler/headers have a conflicting definition. */ --- 19,25 ---- */ #define PACKAGE "sed" ! #define VERSION "4.0.4-boot" #define BOOTSTRAP 1 /* Undefine if your compiler/headers have a conflicting definition. */ diff -rNC3 sed-4.0.3/configure sed-4.0.4/configure *** sed-4.0.3/configure Thu Nov 21 12:44:54 2002 --- sed-4.0.4/configure Thu Dec 12 20:13:49 2002 *************** *** 1,6 **** #! /bin/sh # Guess values for system-dependent variables and create Makefiles. ! # Generated by GNU Autoconf 2.54 for sed 4.0.3. # # Report bugs to . # --- 1,6 ---- #! /bin/sh # Guess values for system-dependent variables and create Makefiles. ! # Generated by GNU Autoconf 2.54 for sed 4.0.4. # # Report bugs to . # *************** *** 265,272 **** # Identity of this package. PACKAGE_NAME='sed' PACKAGE_TARNAME='sed' ! PACKAGE_VERSION='4.0.3' ! PACKAGE_STRING='sed 4.0.3' PACKAGE_BUGREPORT='bonzini@gnu.org' ac_unique_file="sed/sed.c" --- 265,272 ---- # Identity of this package. PACKAGE_NAME='sed' PACKAGE_TARNAME='sed' ! PACKAGE_VERSION='4.0.4' ! PACKAGE_STRING='sed 4.0.4' PACKAGE_BUGREPORT='bonzini@gnu.org' ac_unique_file="sed/sed.c" *************** *** 776,782 **** # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF ! \`configure' configures sed 4.0.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... --- 776,782 ---- # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF ! \`configure' configures sed 4.0.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... *************** *** 842,848 **** if test -n "$ac_init_help"; then case $ac_init_help in ! short | recursive ) echo "Configuration of sed 4.0.3:";; esac cat <<\_ACEOF --- 842,848 ---- if test -n "$ac_init_help"; then case $ac_init_help in ! short | recursive ) echo "Configuration of sed 4.0.4:";; esac cat <<\_ACEOF *************** *** 936,942 **** test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF ! sed configure 4.0.3 generated by GNU Autoconf 2.54 Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002 --- 936,942 ---- test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF ! sed configure 4.0.4 generated by GNU Autoconf 2.54 Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002 *************** *** 951,957 **** This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. ! It was created by sed $as_me 4.0.3, which was generated by GNU Autoconf 2.54. Invocation command line was $ $0 $@ --- 951,957 ---- This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. ! It was created by sed $as_me 4.0.4, which was generated by GNU Autoconf 2.54. Invocation command line was $ $0 $@ *************** *** 1525,1531 **** # Define the identity of the package. PACKAGE=sed ! VERSION=4.0.3 cat >>confdefs.h <<_ACEOF --- 1525,1531 ---- # Define the identity of the package. PACKAGE=sed ! VERSION=4.0.4 cat >>confdefs.h <<_ACEOF *************** *** 7124,7130 **** } >&5 cat >&5 <<_CSEOF ! This file was extended by sed $as_me 4.0.3, which was generated by GNU Autoconf 2.54. Invocation command line was CONFIG_FILES = $CONFIG_FILES --- 7124,7130 ---- } >&5 cat >&5 <<_CSEOF ! This file was extended by sed $as_me 4.0.4, which was generated by GNU Autoconf 2.54. Invocation command line was CONFIG_FILES = $CONFIG_FILES *************** *** 7186,7192 **** cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ ! sed config.status 4.0.3 configured by $0, generated by GNU Autoconf 2.54, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" --- 7186,7192 ---- cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ ! sed config.status 4.0.4 configured by $0, generated by GNU Autoconf 2.54, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" diff -rNC3 sed-4.0.3/configure.ac sed-4.0.4/configure.ac *** sed-4.0.3/configure.ac Thu Nov 21 12:44:45 2002 --- sed-4.0.4/configure.ac Thu Dec 12 20:08:07 2002 *************** *** 1,5 **** dnl Process this file with -*- autoconf -*- to produce a configure script. ! AC_INIT(sed, 4.0.3, bonzini@gnu.org, sed) AC_CONFIG_AUX_DIR(config) AC_CONFIG_SRCDIR([sed/sed.c]) AM_CONFIG_HEADER(config.h:config_h.in) --- 1,5 ---- dnl Process this file with -*- autoconf -*- to produce a configure script. ! AC_INIT(sed, 4.0.4, bonzini@gnu.org, sed) AC_CONFIG_AUX_DIR(config) AC_CONFIG_SRCDIR([sed/sed.c]) AM_CONFIG_HEADER(config.h:config_h.in) diff -rNC3 sed-4.0.3/doc/Makefile.am sed-4.0.4/doc/Makefile.am *** sed-4.0.3/doc/Makefile.am Tue Nov 19 20:27:48 2002 --- sed-4.0.4/doc/Makefile.am Fri Dec 6 17:44:30 2002 *************** *** 1,11 **** ## Process this file with automake to produce Makefile.in info_TEXINFOS = sed.texi dist_man_MANS = sed.1 dist_noinst_DATA = sed.x sed-in.texi dist_noinst_SCRIPTS = groupify.sed CLEANFILES = sed.html TEXI2DVI = $(top_srcdir)/config/texi2dvi --expand ! HELP2MAN = $(top_srcdir)/config/help2man SED = $(top_builddir)/sed/sed # To produce better quality output, in the example sed --- 1,12 ---- ## Process this file with automake to produce Makefile.in info_TEXINFOS = sed.texi + sed_TEXINFOS = config.texi version.texi dist_man_MANS = sed.1 dist_noinst_DATA = sed.x sed-in.texi dist_noinst_SCRIPTS = groupify.sed CLEANFILES = sed.html TEXI2DVI = $(top_srcdir)/config/texi2dvi --expand ! HELP2MAN = $(top_srcdir)/config/help2man SED = $(top_builddir)/sed/sed # To produce better quality output, in the example sed *************** *** 16,22 **** sed -nf groupify.sed < $(srcdir)/sed-in.texi > $(srcdir)/sed.texi sed.1: $(top_srcdir)/sed/sed.c sed.x ! $(HELP2MAN) --include sed.x $(SED) > $(srcdir)/sed.1 dist-hook: touch $(distdir)/sed.1 --- 17,23 ---- sed -nf groupify.sed < $(srcdir)/sed-in.texi > $(srcdir)/sed.texi sed.1: $(top_srcdir)/sed/sed.c sed.x ! $(HELP2MAN) -p sed --include sed.x $(SED) > $(srcdir)/sed.1 dist-hook: touch $(distdir)/sed.1 diff -rNC3 sed-4.0.3/doc/Makefile.in sed-4.0.4/doc/Makefile.in *** sed-4.0.3/doc/Makefile.in Thu Nov 21 12:45:10 2002 --- sed-4.0.4/doc/Makefile.in Thu Dec 12 20:14:24 2002 *************** *** 142,153 **** sysconfdir = @sysconfdir@ target_alias = @target_alias@ info_TEXINFOS = sed.texi dist_man_MANS = sed.1 dist_noinst_DATA = sed.x sed-in.texi dist_noinst_SCRIPTS = groupify.sed CLEANFILES = sed.html TEXI2DVI = $(top_srcdir)/config/texi2dvi --expand ! HELP2MAN = $(top_srcdir)/config/help2man SED = $(top_builddir)/sed/sed subdir = doc mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs --- 142,154 ---- sysconfdir = @sysconfdir@ target_alias = @target_alias@ info_TEXINFOS = sed.texi + sed_TEXINFOS = config.texi version.texi dist_man_MANS = sed.1 dist_noinst_DATA = sed.x sed-in.texi dist_noinst_SCRIPTS = groupify.sed CLEANFILES = sed.html TEXI2DVI = $(top_srcdir)/config/texi2dvi --expand ! HELP2MAN = $(top_srcdir)/config/help2man SED = $(top_builddir)/sed/sed subdir = doc mkinstalldirs = $(SHELL) $(top_srcdir)/config/mkinstalldirs *************** *** 169,176 **** DATA = $(dist_noinst_DATA) DIST_COMMON = $(dist_man_MANS) $(dist_noinst_DATA) \ ! $(dist_noinst_SCRIPTS) Makefile.am Makefile.in stamp-vti \ ! version.texi all: all-am .SUFFIXES: --- 170,177 ---- DATA = $(dist_noinst_DATA) DIST_COMMON = $(dist_man_MANS) $(dist_noinst_DATA) \ ! $(dist_noinst_SCRIPTS) $(sed_TEXINFOS) Makefile.am Makefile.in \ ! stamp-vti version.texi all: all-am .SUFFIXES: *************** *** 195,203 **** TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ $(TEXI2PDF) `test -f '$<' || echo '$(srcdir)/'`$< ! sed.info: sed.texi version.texi ! sed.dvi: sed.texi version.texi ! sed.pdf: sed.texi version.texi version.texi: stamp-vti stamp-vti: sed.texi $(top_srcdir)/configure.ac @(dir=.; test -f ./sed.texi || dir=$(srcdir); \ --- 196,204 ---- TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ $(TEXI2PDF) `test -f '$<' || echo '$(srcdir)/'`$< ! sed.info: sed.texi version.texi $(sed_TEXINFOS) ! sed.dvi: sed.texi version.texi $(sed_TEXINFOS) ! sed.pdf: sed.texi version.texi $(sed_TEXINFOS) version.texi: stamp-vti stamp-vti: sed.texi $(top_srcdir)/configure.ac @(dir=.; test -f ./sed.texi || dir=$(srcdir); \ *************** *** 468,474 **** sed -nf groupify.sed < $(srcdir)/sed-in.texi > $(srcdir)/sed.texi sed.1: $(top_srcdir)/sed/sed.c sed.x ! $(HELP2MAN) --include sed.x $(SED) > $(srcdir)/sed.1 dist-hook: touch $(distdir)/sed.1 --- 469,475 ---- sed -nf groupify.sed < $(srcdir)/sed-in.texi > $(srcdir)/sed.texi sed.1: $(top_srcdir)/sed/sed.c sed.x ! $(HELP2MAN) -p sed --include sed.x $(SED) > $(srcdir)/sed.1 dist-hook: touch $(distdir)/sed.1 diff -rNC3 sed-4.0.3/doc/config.texi sed-4.0.4/doc/config.texi *** sed-4.0.3/doc/config.texi Thu Jan 1 01:00:00 1970 --- sed-4.0.4/doc/config.texi Thu Dec 12 20:05:56 2002 *************** *** 0 **** --- 1,9 ---- + @dircategory Text creation and manipulation + @direntry + * sed: (sed). Stream EDitor. + + @end direntry + + @clear PERL + @set SSEDEXT @acronym{GNU} extensions + @set SSED @acronym{GNU} @command{sed} diff -rNC3 sed-4.0.3/doc/groupify.sed sed-4.0.4/doc/groupify.sed *** sed-4.0.3/doc/groupify.sed Sun Dec 16 13:29:55 2001 --- sed-4.0.4/doc/groupify.sed Wed Nov 27 13:49:16 2002 *************** *** 34,40 **** h :c n ! /^@end/! { /^[ ]*$/! { H bc --- 34,40 ---- h :c n ! /^@end example/! { /^[ ]*$/! { H bc diff -rNC3 sed-4.0.3/doc/sed-in.texi sed-4.0.4/doc/sed-in.texi *** sed-4.0.3/doc/sed-in.texi Tue Nov 19 20:54:33 2002 --- sed-4.0.4/doc/sed-in.texi Thu Dec 12 20:06:24 2002 *************** *** 3,8 **** --- 3,15 ---- @c -- Stuff that needs adding: ---------------------------------------------- @c (document the `;' command-separator) @c -------------------------------------------------------------------------- + @c Check for consistency: regexps in @code, text that they match in @samp. + @c + @c Tips: + @c @command for command + @c @samp for command fragments: @samp{cat -s} + @c @code for sed commands and flags + @c Use ``quote'' not `quote' or "quote". @c @c %**start of header @setfilename sed.info *************** *** 21,45 **** @defcodeindex op @syncodeindex op fn ! @direntry ! * sed: (sed). Stream EDitor. ! ! @end direntry ! ! @clear PERL ! @set SSEDEXT @acronym{GNU} extensions ! @set SSED @acronym{GNU} @code{sed} @copying This file documents version @value{VERSION} of @value{SSED}, a stream editor. - @ignore - Published by the Free Software Foundation, @* - 59 Temple Place - Suite 330 @* - Boston, MA 02111-1307, USA - @end ignore - Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. This document is released under the terms of the GNU Free Documentation --- 28,39 ---- @defcodeindex op @syncodeindex op fn ! @include config.texi @copying This file documents version @value{VERSION} of @value{SSED}, a stream editor. Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. This document is released under the terms of the GNU Free Documentation *************** *** 57,63 **** @setchapternewpage off @titlepage ! @title sed, a stream editor @subtitle version @value{VERSION}, @value{UPDATED} @author by Ken Pizzini, Paolo Bonzini --- 51,57 ---- @setchapternewpage off @titlepage ! @title @command{sed}, a stream editor @subtitle version @value{VERSION}, @value{UPDATED} @author by Ken Pizzini, Paolo Bonzini *************** *** 66,71 **** --- 60,69 ---- Copyright @copyright{} 1998, 1999 Free Software Foundation, Inc. @insertcopying + + Published by the Free Software Foundation, @* + 59 Temple Place - Suite 330 @* + Boston, MA 02111-1307, USA @end titlepage *************** *** 79,111 **** @menu * Introduction:: Introduction * Invoking sed:: Invocation ! * sed Programs:: @code{sed} programs * Examples:: Some sample scripts * Limitations:: Limitations and (non-)limitations of @value{SSED} ! * Other Resources:: Other resources for learning about @code{sed} * Reporting Bugs:: Reporting bugs ! * Extended regexps:: @code{egrep}-style regular expressions @ifset PERL * Perl regexps:: Perl-style regular expressions @end ifset * Concept Index:: A menu with all the topics in this manual. ! * Command and Option Index:: A menu with all @code{sed} commands and command-line options. @detailmenu --- The detailed node listing --- sed Programs: ! * Addresses:: Selecting lines with @code{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @code{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @code{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @code{sed} gurus ! * SSED-specific Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters Examples: --- 77,109 ---- @menu * Introduction:: Introduction * Invoking sed:: Invocation ! * sed Programs:: @command{sed} programs * Examples:: Some sample scripts * Limitations:: Limitations and (non-)limitations of @value{SSED} ! * Other Resources:: Other resources for learning about @command{sed} * Reporting Bugs:: Reporting bugs ! * Extended regexps:: @command{egrep}-style regular expressions @ifset PERL * Perl regexps:: Perl-style regular expressions @end ifset * Concept Index:: A menu with all the topics in this manual. ! * Command and Option Index:: A menu with all @command{sed} commands and command-line options. @detailmenu --- The detailed node listing --- sed Programs: ! * Addresses:: Selecting lines with @command{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @command{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @command{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @command{sed} gurus ! * Extended Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters Examples: *************** *** 151,165 **** @chapter Introduction @cindex Stream editor ! @code{sed} is a stream editor. A stream editor is used to perform basic text transformations on an input stream (a file or input from a pipeline). While in some ways similar to an editor which ! permits scripted edits (such as @code{ed}), ! @code{sed} works by making only one pass over the input(s), and is consequently more efficient. ! But it is @code{sed}'s ability to filter text in a pipeline which particularly distinguishes it from other types of editors. --- 149,163 ---- @chapter Introduction @cindex Stream editor ! @command{sed} is a stream editor. A stream editor is used to perform basic text transformations on an input stream (a file or input from a pipeline). While in some ways similar to an editor which ! permits scripted edits (such as @command{ed}), ! @command{sed} works by making only one pass over the input(s), and is consequently more efficient. ! But it is @command{sed}'s ability to filter text in a pipeline which particularly distinguishes it from other types of editors. *************** *** 167,185 **** @node Invoking sed @chapter Invocation ! @code{sed} may be invoked with the following command-line options: ! @table @samp ! @item @code{-V} ! @itemx @code{--version} @opindex -V @opindex --version @cindex Version, printing ! Print out the version of @code{sed} that is being run and a copyright notice, then exit. ! @item @code{-h} ! @itemx @code{--help} @opindex -h @opindex --help @cindex Usage summary, printing --- 165,183 ---- @node Invoking sed @chapter Invocation ! @command{sed} may be invoked with the following command-line options: ! @table @code ! @item -V ! @itemx --version @opindex -V @opindex --version @cindex Version, printing ! Print out the version of @command{sed} that is being run and a copyright notice, then exit. ! @item -h ! @itemx --help @opindex -h @opindex --help @cindex Usage summary, printing *************** *** 187,207 **** and the bug-reporting address, then exit. ! @item @code{-n} ! @itemx @code{--quiet} ! @itemx @code{--silent} @opindex -n @opindex --quiet @opindex --silent @cindex Disabling autoprint, from command line ! By default, @code{sed} will print out the pattern space at the end of each cycle through the script. These options disable this automatic printing, ! and @code{sed} will only produce output when explicitly told to via the @code{p} command. ! @item @code{-i}[@var{SUFFIX}] ! @itemx @code{--in-place[=@var{SUFFIX}]} @opindex -i @opindex --in-place @cindex In-place editing --- 185,205 ---- and the bug-reporting address, then exit. ! @item -n ! @itemx --quiet ! @itemx --silent @opindex -n @opindex --quiet @opindex --silent @cindex Disabling autoprint, from command line ! By default, @command{sed} prints out the pattern space at the end of each cycle through the script. These options disable this automatic printing, ! and @command{sed} only produces output when explicitly told to via the @code{p} command. ! @item -i[@var{SUFFIX}] ! @itemx --in-place[=@var{SUFFIX}] @opindex -i @opindex --in-place @cindex In-place editing *************** *** 209,220 **** This option specifies that files are to be edited in-place. @value{SSED} does this by creating a temporary file and sending output to this file rather than to the standard ! output@footnote{This applies to commands such as @code{=}, @code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can still write to the standard output by using the @code{w} @cindex @value{SSEDEXT}, @file{/dev/stdout} file or @code{W} commands together with the @file{/dev/stdout} ! special file}. When the end of the file is reached, the temporary file is renamed to the output file's original name. --- 207,218 ---- This option specifies that files are to be edited in-place. @value{SSED} does this by creating a temporary file and sending output to this file rather than to the standard ! output.@footnote{This applies to commands such as @code{=}, @code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can still write to the standard output by using the @code{w} @cindex @value{SSEDEXT}, @file{/dev/stdout} file or @code{W} commands together with the @file{/dev/stdout} ! special file} When the end of the file is reached, the temporary file is renamed to the output file's original name. *************** *** 234,266 **** the original files into another directory (provided the directory already exists). ! This option implies @code{-s}. ! @item @code{-l} @var{N} ! @itemx @code{--line-length=@var{N}} @opindex -l @opindex --line-length @cindex Line length, setting ! Specify the default line-wrap length for the 'l' command. A length of 0 (zero) means to never wrap long lines. If not specified, it is taken to be 70. ! @item @code{-r} ! @itemx @code{--regexp-extended} @opindex -r @opindex --regexp-extended @cindex Extended regular expressions, choosing @cindex @acronym{GNU} extensions, extended regular expressions Use extended regular expressions rather than basic regular expressions. Extended regexps are those that ! @code{egrep} accepts; they can be clearer because they ! usually have less backslashes, but are a @sc{gnu} extension ! and hence scripts that use it are not portable. @xref{Extended regexps, , Extended regular expressions}. @ifset PERL ! @item @code{-R} ! @itemx @code{--regexp-perl} @opindex -R @opindex --regexp-perl @cindex Perl-style regular expressions, choosing --- 232,264 ---- the original files into another directory (provided the directory already exists). ! This option implies @option{-s}. ! @item -l @var{N} ! @itemx --line-length=@var{N} @opindex -l @opindex --line-length @cindex Line length, setting ! Specify the default line-wrap length for the @code{l} command. A length of 0 (zero) means to never wrap long lines. If not specified, it is taken to be 70. ! @item -r ! @itemx --regexp-extended @opindex -r @opindex --regexp-extended @cindex Extended regular expressions, choosing @cindex @acronym{GNU} extensions, extended regular expressions Use extended regular expressions rather than basic regular expressions. Extended regexps are those that ! @command{egrep} accepts; they can be clearer because they ! usually have less backslashes, but are a @acronym{GNU} extension ! and hence scripts that use them are not portable. @xref{Extended regexps, , Extended regular expressions}. @ifset PERL ! @item -R ! @itemx --regexp-perl @opindex -R @opindex --regexp-perl @cindex Perl-style regular expressions, choosing *************** *** 272,309 **** Perl-style regular expressions}. @end ifset ! @item @code{-s} ! @itemx @code{--separate} @cindex Working on separate files ! By default, @code{sed} will consider the files specified on the command line as a single continuous long stream. This @value{SSED} ! extension allows the user to consider them separate files: ! range addresses (such as @code{/abc/,/def/}) are not allowed to span several files, line numbers are relative to the start of each file, @code{$} refers to the last line of each file, and files invoked from the @code{R} commands are rewound at the start of each file. ! @item @code{-u} ! @itemx @code{--unbuffered} @opindex -u @opindex --unbuffered @cindex Unbuffered I/O, choosing Buffer both input and output as minimally as practical. (This is particularly useful if the input is coming from ! the likes of @code{tail -f}, and you wish to see the transformed output as soon as possible.) ! @item @code{-e} @var{script} ! @itemx @code{--expression=@var{script}} @opindex -e @opindex --expression @cindex Script, from command line Add the commands in @var{script} to the set of commands to be run while processing the input. ! @item @code{-f} @var{script-file} ! @itemx @code{--file=@var{script-file}} @opindex -f @opindex --file @cindex Script, from a file --- 270,307 ---- Perl-style regular expressions}. @end ifset ! @item -s ! @itemx --separate @cindex Working on separate files ! By default, @command{sed} will consider the files specified on the command line as a single continuous long stream. This @value{SSED} ! extension allows the user to consider them as separate files: ! range addresses (such as @samp{/abc/,/def/}) are not allowed to span several files, line numbers are relative to the start of each file, @code{$} refers to the last line of each file, and files invoked from the @code{R} commands are rewound at the start of each file. ! @item -u ! @itemx --unbuffered @opindex -u @opindex --unbuffered @cindex Unbuffered I/O, choosing Buffer both input and output as minimally as practical. (This is particularly useful if the input is coming from ! the likes of @samp{tail -f}, and you wish to see the transformed output as soon as possible.) ! @item -e @var{script} ! @itemx --expression=@var{script} @opindex -e @opindex --expression @cindex Script, from command line Add the commands in @var{script} to the set of commands to be run while processing the input. ! @item -f @var{script-file} ! @itemx --file=@var{script-file} @opindex -f @opindex --file @cindex Script, from a file *************** *** 312,318 **** @end table ! If no @samp{-e}, @samp{-f}, @samp{--expression}, or @samp{--file} options are given on the command-line, then the first non-option argument on the command line is taken to be the @var{script} to be executed. --- 310,316 ---- @end table ! If no @option{-e}, @option{-f}, @option{--expression}, or @option{--file} options are given on the command-line, then the first non-option argument on the command line is taken to be the @var{script} to be executed. *************** *** 327,343 **** @node sed Programs ! @chapter @code{sed} Programs ! @cindex @code{sed} program structure @cindex Script structure ! A @code{sed} program consists of one or more @code{sed} commands, passed in by one or more of the ! @samp{-e}, @samp{-f}, @samp{--expression}, and @samp{--file} options, or the first non-option argument if zero of these options are used. ! This document will refer to ``the'' @code{sed} script; ! this will be understood to mean the in-order catenation of all of the @var{script}s and @var{script-file}s passed in. Each @code{sed} command consists of an optional address or --- 325,341 ---- @node sed Programs ! @chapter @command{sed} Programs ! @cindex @command{sed} program structure @cindex Script structure ! A @command{sed} program consists of one or more @command{sed} commands, passed in by one or more of the ! @option{-e}, @option{-f}, @option{--expression}, and @option{--file} options, or the first non-option argument if zero of these options are used. ! This document will refer to ``the'' @command{sed} script; ! this is understood to mean the in-order catenation of all of the @var{script}s and @var{script-file}s passed in. Each @code{sed} command consists of an optional address or *************** *** 345,396 **** and any additional command-specific code. @menu ! * Addresses:: Selecting lines with @code{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @code{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @code{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @code{sed} gurus ! * SSED-specific Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters @end menu @node Addresses ! @section Selecting lines with @code{sed} ! @cindex Addresses, in @code{sed} scripts @cindex Line selection @cindex Selecting lines to process ! Addresses in a @code{sed} script can be in any of the following forms: ! @table @samp @item @var{number} @cindex Address, numeric @cindex Line, selecting by number Specifying a line number will match only that line in the input. ! (Note that @code{sed} counts lines continuously across all input files ! unless @code{-i} or @code{-s} options are specified.) @item @var{first}~@var{step} ! @cindex @acronym{GNU} extensions, @code{@var{n}~@var{m}} addresses ! This @sc{gnu} extension matches every @var{step}th line starting with line @var{first}. In particular, lines will be selected when there exists a non-negative @var{n} such that the current line-number equals @var{first} + (@var{n} * @var{step}). Thus, to select the odd-numbered lines, one would use @code{1~2}; ! to pick every third line starting with the second, @code{2~3} would be used; ! to pick every fifth line starting with the tenth, use @code{10~5}; ! and @code{50~0} is just an obscure way of saying @code{50}. @item $ @cindex Address, last line @cindex Last line, selecting @cindex Line, selecting last This address matches the last line of the last file of input, or ! the last line of each file when the @code{-i} or @code{-s} options are specified. @item /@var{regexp}/ --- 343,394 ---- and any additional command-specific code. @menu ! * Addresses:: Selecting lines with @command{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @command{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @command{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @command{sed} gurus ! * Extended Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters @end menu @node Addresses ! @section Selecting lines with @command{sed} ! @cindex Addresses, in @command{sed} scripts @cindex Line selection @cindex Selecting lines to process ! Addresses in a @command{sed} script can be in any of the following forms: ! @table @code @item @var{number} @cindex Address, numeric @cindex Line, selecting by number Specifying a line number will match only that line in the input. ! (Note that @command{sed} counts lines continuously across all input files ! unless @option{-i} or @option{-s} options are specified.) @item @var{first}~@var{step} ! @cindex @acronym{GNU} extensions, @samp{@var{n}~@var{m}} addresses ! This @acronym{GNU} extension matches every @var{step}th line starting with line @var{first}. In particular, lines will be selected when there exists a non-negative @var{n} such that the current line-number equals @var{first} + (@var{n} * @var{step}). Thus, to select the odd-numbered lines, one would use @code{1~2}; ! to pick every third line starting with the second, @samp{2~3} would be used; ! to pick every fifth line starting with the tenth, use @samp{10~5}; ! and @samp{50~0} is just an obscure way of saying @code{50}. @item $ @cindex Address, last line @cindex Last line, selecting @cindex Line, selecting last This address matches the last line of the last file of input, or ! the last line of each file when the @option{-i} or @option{-s} options are specified. @item /@var{regexp}/ *************** *** 405,418 **** @cindex @acronym{GNU} extensions, modifiers and the empty regular expression @cindex @value{SSEDEXT}, modifiers and the empty regular expression Unless @code{POSIXLY_CORRECT} is set, the empty regular expression ! @code{//} repeats the last regular expression match (the same holds if the empty regular expression is passed to the @code{s} command). Note that modifiers to regular expressions are evaluated when the regular expression is compiled, thus it is illegal to specify them together with the empty regular expression. ! If @code{POSIXLY_CORRECT} is set, instead, @code{//} is the null match: this behavior is mandated by @sc{posix}, but it would break too many legacy ! sed scripts to blithely change @value{SSED}'s default behavior. @item \%@var{regexp}% (The @code{%} may be replaced by any other single character.) --- 403,417 ---- @cindex @acronym{GNU} extensions, modifiers and the empty regular expression @cindex @value{SSEDEXT}, modifiers and the empty regular expression Unless @code{POSIXLY_CORRECT} is set, the empty regular expression ! @samp{//} repeats the last regular expression match (the same holds if the empty regular expression is passed to the @code{s} command). Note that modifiers to regular expressions are evaluated when the regular expression is compiled, thus it is illegal to specify them together with the empty regular expression. ! @c *** CHECK CURRENT POSIX, I'M NOT SURE THIS IS STILL TRUE. ADR. ! If @code{POSIXLY_CORRECT} is set, instead, @samp{//} is the null match: this behavior is mandated by @sc{posix}, but it would break too many legacy ! @command{sed} scripts to blithely change @value{SSED}'s default behavior. @item \%@var{regexp}% (The @code{%} may be replaced by any other single character.) *************** *** 421,427 **** This also matches the regular expression @var{regexp}, but allows one to use a different delimiter than @code{/}. This is particularly useful if the @var{regexp} itself contains ! a lot of @code{/}s, since it avoids the tedious escaping of every @code{/}. If @var{regexp} itself includes any delimiter characters, each must be escaped by a backslash (@code{\}). --- 420,426 ---- This also matches the regular expression @var{regexp}, but allows one to use a different delimiter than @code{/}. This is particularly useful if the @var{regexp} itself contains ! a lot of slashes, since it avoids the tedious escaping of every @code{/}. If @var{regexp} itself includes any delimiter characters, each must be escaped by a backslash (@code{\}). *************** *** 431,437 **** @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @sc{gnu} extension which causes the @var{regexp} to be matched in a case-insensitive manner. --- 430,436 ---- @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @acronym{GNU} extension which causes the @var{regexp} to be matched in a case-insensitive manner. *************** *** 445,452 **** extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences (@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} ! in basic or extended regular expression modes) which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. --- 444,459 ---- extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences ! @ifset PERL ! (@code{\A} and @code{\Z} in Perl mode, ! @code{\`} and @code{\'} ! @end ifset ! @ifclear PERL ! (@code{\`} and @code{\'} ! @end ifclear ! in basic or extended regular expression modes) ! which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. *************** *** 489,497 **** (inclusively). If the second address is a @var{regexp}, then checking for the ending match will start with the line @emph{following} the ! line which matched the first address. As a @sc{gnu} extension, a line number of @code{0} can be used in an address specification ! like @code{0,/@var{regexp}/} so that @var{regexp} will be matched in the first input line too. If the second address is a @var{number} less than (or equal to) --- 496,504 ---- (inclusively). If the second address is a @var{regexp}, then checking for the ending match will start with the line @emph{following} the ! line which matched the first address. As a @acronym{GNU} extension, a line number of @code{0} can be used in an address specification ! like @samp{0,/@var{regexp}/} so that @var{regexp} will be matched in the first input line too. If the second address is a @var{number} less than (or equal to) *************** *** 507,532 **** @cindex @acronym{GNU} extensions, 0,@var{addr2} addressing @cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing @cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing ! @value{SSED} also supports some special 2-address forms: ! @table @samp @item 0,@var{addr2} ! Start out in "matched first address" state, until @var{addr2} is found. ! This is similar to 1,@var{addr2}, except that if @var{addr2} matches the very first line of input the 0,@var{addr2} form will be at the end of its range, whereas the 1,@var{addr2} form will still be at the beginning of its range. @item @var{addr1},+@var{N} ! Will match @var{addr1} and the @var{N} lines following @var{addr1}. @item @var{addr1},~@var{N} ! Will match @var{addr1} and the lines following @var{addr1} until the next line whose input line number is a multiple of @var{N}. @end table @cindex Excluding lines @cindex Selecting non-matching lines Appending the @code{!} character to the end of an address ! specification will negate the sense of the match. That is, if the @code{!} character follows an address range, then only lines which do @emph{not} match the address range will be selected. --- 514,541 ---- @cindex @acronym{GNU} extensions, 0,@var{addr2} addressing @cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing @cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing ! @value{SSED} also supports some special two-address forms: ! @table @code @item 0,@var{addr2} ! Start out in ``matched first address'' state, until @var{addr2} is found. ! This is similar to @samp{1,@var{addr2}}, except that if @var{addr2} matches the very first line of input the 0,@var{addr2} form will be at the end of its range, whereas the 1,@var{addr2} form will still be at the beginning of its range. + @item @var{addr1},+@var{N} ! Matches @var{addr1} and the @var{N} lines following @var{addr1}. ! @item @var{addr1},~@var{N} ! Matches @var{addr1} and the lines following @var{addr1} until the next line whose input line number is a multiple of @var{N}. @end table @cindex Excluding lines @cindex Selecting non-matching lines Appending the @code{!} character to the end of an address ! specification negates the sense of the match. That is, if the @code{!} character follows an address range, then only lines which do @emph{not} match the address range will be selected. *************** *** 535,543 **** @node Regular Expressions ! @section Overview of regular expression syntax ! To know how to use @code{sed}, people should understand regular expressions (@dfn{regexp} for short). A regular expression is a pattern that is matched against a subject string from left to right. Most characters stand for --- 544,552 ---- @node Regular Expressions ! @section Overview of Regular Expression Syntax ! To know how to use @command{sed}, people should understand regular expressions (@dfn{regexp} for short). A regular expression is a pattern that is matched against a subject string from left to right. Most characters stand for *************** *** 555,577 **** These are encoded in the pattern by the use of metacharacters, which do not stand for themselves but instead are interpreted in some special way. Here is a brief description ! of regular expression syntax as used in @code{sed}. @table @code @item @var{char} ! A single char, if not special, is matched against text. @item * ! Matches a sequence of zero or more repetitions of previous char, grouped regexp (see below), or class. @item \+ @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but matches one or more. It is a @sc{gnu} extension. @item \? @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but only matches zero or one. It is a @sc{gnu} extension. @item \@{@var{i}\@} As *, but matches exactly @var{i} sequences (@var{i} is a --- 564,586 ---- These are encoded in the pattern by the use of metacharacters, which do not stand for themselves but instead are interpreted in some special way. Here is a brief description ! of regular expression syntax as used in @command{sed}. @table @code @item @var{char} ! A single character, if not special, is matched against text. @item * ! Matches a sequence of zero or more repetitions of previous character, grouped regexp (see below), or class. @item \+ @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but matches one or more. It is a @acronym{GNU} extension. @item \? @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but only matches zero or one. It is a @acronym{GNU} extension. @item \@{@var{i}\@} As *, but matches exactly @var{i} sequences (@var{i} is a *************** *** 588,626 **** @itemize @bullet @item ! apply postfix operators, like @code{\(abcd\)*}: this will search for zero or more whole sequences of @samp{abcd}, while @code{abcd*} would search for @samp{abc} followed by zero or more occurrences ! of @samp{d} @item ! use back references (see below) @end itemize @item . Matches any character @item ^ ! Match the null string at beginning of line, i.e. what ! what appears after the caret must appear at the beginning of line. @code{^#include} will match only ! lines where "#include" is the first thing on line---if ! there are one or two spaces before, the match fails. @item $ It is the same as @code{^}, but refers to end of line @item [@var{list}] @itemx [^@var{list}] ! Matches any single char in @var{list}: for example, @samp{[aeiou]} matches all vowels. A list may include sequences like @samp{@var{char1}-@var{char2}}, which matches any character between (inclusive) @var{char1} and @var{char2}. The caret reverses the meaning of the regexp, so that ! it matches any single char NOT in list. To include @samp{]} in the list, make it the first character (after the caret if needed), to include @samp{-} in the list, make it the first or last; to include @samp{^} put --- 597,637 ---- @itemize @bullet @item ! @cindex @acronym{GNU} extensions, to basic regular expressions ! Apply postfix operators, like @code{\(abcd\)*}: this will search for zero or more whole sequences of @samp{abcd}, while @code{abcd*} would search for @samp{abc} followed by zero or more occurrences ! of @samp{d}. Note that this is not in the @sc{posix} ! standard and hence is not portable. @item ! Use back references (see below) @end itemize @item . Matches any character @item ^ ! Matches the null string at beginning of line, i.e. what ! appears after the caret must appear at the beginning of line. @code{^#include} will match only ! lines where @samp{#include} is the first thing on line---if ! there are spaces before, for example, the match fails. @item $ It is the same as @code{^}, but refers to end of line @item [@var{list}] @itemx [^@var{list}] ! Matches any single character in @var{list}: for example, @samp{[aeiou]} matches all vowels. A list may include sequences like @samp{@var{char1}-@var{char2}}, which matches any character between (inclusive) @var{char1} and @var{char2}. The caret reverses the meaning of the regexp, so that ! it matches any single character NOT in list. To include @samp{]} in the list, make it the first character (after the caret if needed), to include @samp{-} in the list, make it the first or last; to include @samp{^} put *************** *** 632,656 **** parentheses to use complex alternative regular expressions. The matching process tries each alternative in turn, from left to right, and the first one that succeeds is used. ! It is a @sc{gnu} extension. @item \@var{digit} ! Matches the @var{digit}-th @code{\(\)} reference in the regular expression. @item \@var{char} Matches character @var{char}; this is to be used to match ! special chars, referred above. Note that the only C-like backslash sequence that you can portably assume to be interpreted is @code{\n} for a new-line; in particular @code{\t} matches a @samp{t} under most implementations ! of @code{sed}, rather than a tabulation character. @end table @cindex Greedy regular expression matching ! Note that the regular expression matcher is greedy, i.e. if ! two or more matches are detected, it selects the longest, if there are two or more selected with the same size, it selects the first in text. --- 643,667 ---- parentheses to use complex alternative regular expressions. The matching process tries each alternative in turn, from left to right, and the first one that succeeds is used. ! It is a @acronym{GNU} extension. @item \@var{digit} ! Matches the @var{digit}-th @code{\(@dots{}\)} reference in the regular expression. @item \@var{char} Matches character @var{char}; this is to be used to match ! special characters, referred above. Note that the only C-like backslash sequence that you can portably assume to be interpreted is @code{\n} for a new-line; in particular @code{\t} matches a @samp{t} under most implementations ! of @command{sed}, rather than a tabulation character. @end table @cindex Greedy regular expression matching ! Note that the regular expression matcher is greedy, i.e., if ! two or more matches are detected, it selects the longest; if there are two or more selected with the same size, it selects the first in text. *************** *** 658,671 **** Examples: @table @samp @item abcdef ! Matches @samp{abcdef} @item a*b Matches zero or more @samp{a}s followed by a single @samp{b}. For example, @samp{b} or @samp{aaaaab}. @item a\?b ! Matches @samp{b} or @samp{ab} @item a\+b\+ Matches one or more @samp{a}s followed by one or more --- 669,682 ---- Examples: @table @samp @item abcdef ! Matches @samp{abcdef}. @item a*b Matches zero or more @samp{a}s followed by a single @samp{b}. For example, @samp{b} or @samp{aaaaab}. @item a\?b ! Matches @samp{b} or @samp{ab}. @item a\+b\+ Matches one or more @samp{a}s followed by one or more *************** *** 675,735 **** @item .* @itemx .\+ ! These two will both match all the characters on a line; ! however, the first will match every line (including empty ! ones), while the second will only match lines containing ! at least one char. @item ^main.*(.*) ! This will search for a line containing "main" as the first thing on the line, followed by an opening and closing parenthesis. The @samp{n}, @samp{(} and @samp{)} need not ! be adjacent @item ^# ! This will match lines beginning with a hash (or sharp) character. @item \\$ ! This will match lines ending with a single backslash. The regexp contains two backslashes for escaping. @item \$ ! Instead, this will match lines containing a single dollar, because it is escaped. ! @item [a-zA-Z_] ! This will match any letters or digits @item [^ @kbd{tab}]\+ ! This will match one or more sequences ! of any char that isn't a space or tab. ! Usually this means a word @item ^\(.*\)\n\1$ ! This will match two equal lines without a trailing new-line ! @item A.\@{9\@}$ ! This will match an "A" that is exactly the last tenth ! character on line ! @item ^.\@{,15\@}A ! Match the last "A" on the first 16 chars of the line @end table @node Data Spaces ! @section Where @code{sed} buffers data @cindex Buffer spaces, pattern and hold @cindex Spaces, pattern and hold @cindex Pattern space, definition @cindex Hold space, definition ! @code{sed} maintains two data buffers: the active @emph{pattern} space, and the auxiliary @emph{hold} space. ! In ``normal'' operation, @code{sed} reads in one line from the input stream and places it in the pattern space. This pattern space is where text manipulations occur. The hold space is initially empty, but there are commands --- 686,746 ---- @item .* @itemx .\+ ! These two both match all the characters on a line; ! however, the first matches every line (including empty ! ones), while the second only matches lines containing ! at least one character. @item ^main.*(.*) ! This searches for a line containing @samp{main} as the first thing on the line, followed by an opening and closing parenthesis. The @samp{n}, @samp{(} and @samp{)} need not ! be adjacent. @item ^# ! This matches lines beginning with a hash (or sharp) character. @item \\$ ! This matches lines ending with a single backslash. The regexp contains two backslashes for escaping. @item \$ ! Instead, this matches lines containing a single dollar, because it is escaped. ! @item [a-zA-Z0-9] ! This matches any letters or digits. @item [^ @kbd{tab}]\+ ! This matches one or more sequences ! of any character that isn't a space or tab. ! Usually this means a word. @item ^\(.*\)\n\1$ ! This matches two equal lines without a trailing new-line. ! @item .\@{9\@}A$ ! This matches an @code{A} that is the last ! character on line, with at least nine preceding characters. ! @item ^.\@{15\@}A ! This matches an @code{A} that is the 16th character on a line. @end table @node Data Spaces ! @section Where @command{sed} Buffers Data @cindex Buffer spaces, pattern and hold @cindex Spaces, pattern and hold @cindex Pattern space, definition @cindex Hold space, definition ! @command{sed} maintains two data buffers: the active @emph{pattern} space, and the auxiliary @emph{hold} space. ! In ``normal'' operation, @command{sed} reads in one line from the input stream and places it in the pattern space. This pattern space is where text manipulations occur. The hold space is initially empty, but there are commands *************** *** 738,749 **** @node Common Commands ! @section Often used commands ! If you use @code{sed} at all, you will quite likely want to know these commands. ! @table @samp @item # [No addresses allowed.] --- 749,760 ---- @node Common Commands ! @section Often-Used Commands ! If you use @command{sed} at all, you will quite likely want to know these commands. ! @table @code @item # [No addresses allowed.] *************** *** 754,772 **** @cindex Portability, comments If you are concerned about portability, be aware that ! some implementations of @code{sed} (which are not @sc{posix.2} conformant) may only support a single one-line comment, and then only when the very first character of the script is a @code{#}. @findex -n, forcing from within a script @cindex Caveat --- #n on first line ! Warning: if the first two characters of the @code{sed} script ! are @code{#n}, then the @samp{-n} (no-autoprint) option is forced. If you want to put a comment in the first line of your script ! and that comment begins with the letter `n' and you do not want this behavior, ! then be sure to either use a capital `N', ! or place at least one space before the `n'. @item q [@var{exit-code}] [At most one address allowed.] --- 765,783 ---- @cindex Portability, comments If you are concerned about portability, be aware that ! some implementations of @command{sed} (which are not @sc{posix} conformant) may only support a single one-line comment, and then only when the very first character of the script is a @code{#}. @findex -n, forcing from within a script @cindex Caveat --- #n on first line ! Warning: if the first two characters of the @command{sed} script ! are @code{#n}, then the @option{-n} (no-autoprint) option is forced. If you want to put a comment in the first line of your script ! and that comment begins with the letter @samp{n} and you do not want this behavior, ! then be sure to either use a capital @samp{N}, ! or place at least one space before the @samp{n}. @item q [@var{exit-code}] [At most one address allowed.] *************** *** 774,783 **** @findex q (quit) command @cindex @value{SSEDEXT}, returning an exit code @cindex Quitting ! Exit @code{sed} without processing any more commands or input. Note that the current pattern space is printed if auto-print is ! not disabled with the @code{-n} switch. The ability to return ! an exit code from the @code{sed} script is a @value{SSED} extension. @item d @findex d (delete) command --- 785,794 ---- @findex q (quit) command @cindex @value{SSEDEXT}, returning an exit code @cindex Quitting ! Exit @command{sed} without processing any more commands or input. Note that the current pattern space is printed if auto-print is ! not disabled with the @option{-n} options. The ability to return ! an exit code from the @command{sed} script is a @value{SSED} extension. @item d @findex d (delete) command *************** *** 789,808 **** @findex p (print) command @cindex Text, printing Print out the pattern space (to the standard output). ! This command is usually only used in conjunction with the @samp{-n} command-line option. ! Note: some implementations of @code{sed}, such as this one, will double-print lines when auto-print is not disabled and the @code{p} command is given. Other implementations will only print the line once. ! Both ways conform with the @sc{posix.2} standard, and so neither way can be considered to be in error. ! @cindex Non-bugs, @code{p} command and @samp{-n} flag ! @cindex Portability, @code{p} command and @samp{-n} flag ! Portable @code{sed} scripts should thus avoid relying on either behavior; ! either use the @samp{-n} option and explicitly print what you want, or avoid use of the @code{p} command (and also the @code{p} flag to the @code{s} command). --- 800,820 ---- @findex p (print) command @cindex Text, printing Print out the pattern space (to the standard output). ! This command is usually only used in conjunction with the @option{-n} command-line option. ! Note: some implementations of @command{sed}, such as this one, will double-print lines when auto-print is not disabled and the @code{p} command is given. Other implementations will only print the line once. ! @c CHECK CURRENT STANDARD. I THINK NOT DOUBLE-PRINTING IS HOW IT WORKS. ADR. ! Both ways conform with the @sc{posix} standard, and so neither way can be considered to be in error. ! @cindex Non-bugs, @code{p} command and @option{-n} flag ! @cindex Portability, @code{p} command and @option{-n} flag ! Portable @command{sed} scripts should thus avoid relying on either behavior; ! either use the @option{-n} option and explicitly print what you want, or avoid use of the @code{p} command (and also the @code{p} flag to the @code{s} command). *************** *** 812,818 **** @cindex Read next input line If auto-print is not disabled, print the pattern space, then, regardless, replace the pattern space with the next line of input. ! If there is no more input then @code{sed} exits without processing any more commands. @item @{ @var{commands} @} --- 824,830 ---- @cindex Read next input line If auto-print is not disabled, print the pattern space, then, regardless, replace the pattern space with the next line of input. ! If there is no more input then @command{sed} exits without processing any more commands. @item @{ @var{commands} @} *************** *** 830,843 **** @section The @code{s} Command The syntax of the @code{s} (as in substitute) command is ! s/@var{regexp}/@var{replacement}/@var{flags}. The @code{/} characters may be uniformly replaced by any other single character within any given @code{s} command. The @code{/} character (or whatever other character is used in its stead) can appear in the @var{regexp} or @var{replacement} only if it is preceded by a @code{\} character. ! The @code{s} command is probably the most important in @code{sed} and has a lot of different options. Its basic concept is simple: the @code{s} command attempts to match the pattern space against the supplied @var{regexp}; if the match is --- 842,855 ---- @section The @code{s} Command The syntax of the @code{s} (as in substitute) command is ! @samp{s/@var{regexp}/@var{replacement}/@var{flags}}. The @code{/} characters may be uniformly replaced by any other single character within any given @code{s} command. The @code{/} character (or whatever other character is used in its stead) can appear in the @var{regexp} or @var{replacement} only if it is preceded by a @code{\} character. ! The @code{s} command is probably the most important in @command{sed} and has a lot of different options. Its basic concept is simple: the @code{s} command attempts to match the pattern space against the supplied @var{regexp}; if the match is *************** *** 851,877 **** the portion of the match which is contained between the @var{n}th @code{\(} and its matching @code{\)}. Also, the @var{replacement} can contain unescaped @code{&} ! characters which will reference the whole matched portion of the pattern space. ! @cindex @value{SSEDEXT}, case modifiers in `s' commands Finally (this is a @value{SSED} extension) you can include a special sequence made of a backslash and one of the letters ! @code{LlUuE}. The meaning is, respectively: turn the replacement ! to lowercase until a @code{\U} or @code{\E} is found, turn the ! next character to lowercase, turn the replacement to uppercase ! until a @code{\L} or @code{\E} is found, turn the next character ! to uppercase, and stop case conversion started by @code{\L} or ! @code{\U}. To include a literal @code{\}, @code{&}, or newline in the final replacement, be sure to precede the desired @code{\}, @code{&}, or newline in the @var{replacement} with a @code{\}. @findex s command, option flags @cindex Substitution of text, options ! The @code{s} command can be followed with zero or more of the following @var{flags}: ! @table @samp @item g @cindex Global substitution @cindex Replacing all text matching regexp in a line --- 863,907 ---- the portion of the match which is contained between the @var{n}th @code{\(} and its matching @code{\)}. Also, the @var{replacement} can contain unescaped @code{&} ! characters which reference the whole matched portion of the pattern space. ! @cindex @value{SSEDEXT}, case modifiers in @code{s} commands Finally (this is a @value{SSED} extension) you can include a special sequence made of a backslash and one of the letters ! @code{L}, @code{l}, @code{U}, @code{u}, or @code{E}. ! The meaning is as follows: ! ! @table @code ! @item \L ! Turn the replacement ! to lowercase until a @code{\U} or @code{\E} is found, ! ! @item \l ! Turn the ! next character to lowercase, ! ! @item \U ! Turn the replacement to uppercase ! until a @code{\L} or @code{\E} is found, ! ! @item \u ! Turn the next character ! to uppercase, ! ! @item \E ! Stop case conversion started by @code{\L} or @code{\U}. ! @end table ! To include a literal @code{\}, @code{&}, or newline in the final replacement, be sure to precede the desired @code{\}, @code{&}, or newline in the @var{replacement} with a @code{\}. @findex s command, option flags @cindex Substitution of text, options ! The @code{s} command can be followed by zero or more of the following @var{flags}: ! @table @code @item g @cindex Global substitution @cindex Replacing all text matching regexp in a line *************** *** 884,893 **** @cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command @cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command ! Note: the @sc{posix.2} standard does not specify what should happen when you mix the @code{g} and @var{number} modifiers, and currently there is no widely agreed upon meaning ! across @code{sed} implementations. For @value{SSED}, the interaction is defined to be: ignore matches before the @var{number}th, and then match and replace all matches from --- 914,923 ---- @cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command @cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command ! Note: the @sc{posix} standard does not specify what should happen when you mix the @code{g} and @var{number} modifiers, and currently there is no widely agreed upon meaning ! across @command{sed} implementations. For @value{SSED}, the interaction is defined to be: ignore matches before the @var{number}th, and then match and replace all matches from *************** *** 899,909 **** Note: when both the @code{p} and @code{e} options are specified, the relative ordering of the two produces very different results. ! In general, @code{ep} (evaluate then print) will be what you want, but operating the other way round can be useful for debugging. ! For this reason, the current versions of @value{SSED} interprets specially the presence of @code{p} options both before and after ! @code{e}, printing pattern space before and after evaluation, while in general flags for the @code{s} command show their effect just once. This behavior, although documented, might change in future versions. --- 929,939 ---- Note: when both the @code{p} and @code{e} options are specified, the relative ordering of the two produces very different results. ! In general, @code{ep} (evaluate then print) is what you want, but operating the other way round can be useful for debugging. ! For this reason, the current version of @value{SSED} interprets specially the presence of @code{p} options both before and after ! @code{e}, printing the pattern space before and after evaluation, while in general flags for the @code{s} command show their effect just once. This behavior, although documented, might change in future versions. *************** *** 916,923 **** As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @code{-i} ! switch is being used.} @item e @cindex Evaluate Bourne-shell commands, after substitution --- 946,953 ---- As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @option{-i} ! option is being used.} @item e @cindex Evaluate Bourne-shell commands, after substitution *************** *** 929,935 **** that is found in pattern space is executed and pattern space is replaced with its output. A trailing new-line is suppressed; results are undefined if the command to be executed contains ! a @code{nul} character. This is a @value{SSED} extension. @item I @itemx i --- 959,965 ---- that is found in pattern space is executed and pattern space is replaced with its output. A trailing new-line is suppressed; results are undefined if the command to be executed contains ! a @sc{nul} character. This is a @value{SSED} extension. @item I @itemx i *************** *** 938,945 **** @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @sc{gnu} ! extension which makes @code{sed} match @var{regexp} in a case-insensitive manner. @item M --- 968,975 ---- @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @acronym{GNU} ! extension which makes @command{sed} match @var{regexp} in a case-insensitive manner. @item M *************** *** 952,958 **** extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences (@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} in basic or extended regular expression modes) which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. --- 982,994 ---- extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences ! @ifset PERL ! (@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} ! @end ifset ! @ifclear PERL ! (@code{\`} and @code{\'} ! @end ifclear in basic or extended regular expression modes) which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. *************** *** 984,1027 **** @node Other Commands ! @section Less frequently used commands Though perhaps less frequently used than those in the previous ! section, some very small yet useful @code{sed} scripts can be built with these commands. ! @table @samp ! @item v ! @findex v (version) command ! @cindex @value{SSEDEXT}, checking for their presence ! @cindex Requiring @value{SSED} ! This command does nothing, but will make @code{sed} fail if ! @value{SSED} extensions are not supported, simply because other ! implementations of @code{sed} do not implement it. ! ! @item Q [@var{exit-code}] ! @findex Q (silent Quit) command ! @cindex @value{SSEDEXT}, quitting silently ! @cindex @value{SSEDEXT}, returning an exit code ! @cindex Quitting ! This command is the same as @code{q}, but will not print the ! contents of pattern space. Like @code{q}, it provides the ! ability to return an exit code to the caller. ! ! This command can be useful because the only alternative ways ! to accomplish this apparently trivial function are to use ! the @code{-n} option (which can unnecessarily complicate ! your script) or resorting to the following snippet, which ! wastes time by reading the whole file without any visible effect: ! ! @example ! :eat ! $d # @r{Quit silently on the last line} ! N # @r{Read another line, silently} ! g # @r{Overwrite pattern space each time to save memory} ! b eat ! @end example ! @item y/@var{source-chars}/@var{dest-chars}/ (The @code{/} characters may be uniformly replaced by any other single character within any given @code{y} command.) --- 1020,1032 ---- @node Other Commands ! @section Less Frequently-Used Commands Though perhaps less frequently used than those in the previous ! section, some very small yet useful @command{sed} scripts can be built with these commands. ! @table @code @item y/@var{source-chars}/@var{dest-chars}/ (The @code{/} characters may be uniformly replaced by any other single character within any given @code{y} command.) *************** *** 1047,1057 **** @cindex Text, appending Queue the lines of text which follow this command (each but the last ending with a @code{\}, ! which will be removed from the output) to be output at the end of the current cycle, or when the next input line is read. ! As a @sc{gnu} extension, if between the @code{a} and the newline there is other than a whitespace-@code{\} sequence, then the text of this line, starting at the first non-whitespace character after the @code{a}, is taken as the first line of the @var{text} block. --- 1052,1062 ---- @cindex Text, appending Queue the lines of text which follow this command (each but the last ending with a @code{\}, ! which are removed from the output) to be output at the end of the current cycle, or when the next input line is read. ! As a @acronym{GNU} extension, if between the @code{a} and the newline there is other than a whitespace-@code{\} sequence, then the text of this line, starting at the first non-whitespace character after the @code{a}, is taken as the first line of the @var{text} block. *************** *** 1067,1073 **** @cindex Text, insertion Immediately output the lines of text which follow this command (each but the last ending with a @code{\}, ! which will be removed from the output). @item c\ @itemx @var{text} --- 1072,1078 ---- @cindex Text, insertion Immediately output the lines of text which follow this command (each but the last ending with a @code{\}, ! which are removed from the output). @item c\ @itemx @var{text} *************** *** 1076,1082 **** Delete the lines matching the address or address-range, and output the lines of text which follow this command (each but the last ending with a @code{\}, ! which will be removed from the output) in place of the last line (or in place of each line, if no addresses were specified). A new cycle is started after this command is done, --- 1081,1087 ---- Delete the lines matching the address or address-range, and output the lines of text which follow this command (each but the last ending with a @code{\}, ! which are removed from the output) in place of the last line (or in place of each line, if no addresses were specified). A new cycle is started after this command is done, *************** *** 1107,1135 **** the default as specified on the command line is used. The @var{n} parameter is a @value{SSED} extension. - @item L @var{n} - @findex L (fLow paragraphs) command - @cindex Reformat pattern space - @cindex Reformatting paragraphs - @cindex @value{SSEDEXT}, reformatting paragraphs - @cindex @value{SSEDEXT}, @code{L} command - This @value{SSED} extension fills and joins lines in pattern space - to produce output lines of (at most) @var{n} characters, like - @code{fmt} does; if @var{n} is omitted, the default as specified - on the command line is used. - - Blank lines, spaces between words, and indentation are - preserved in the output; successive input lines with different - indentation are not joined; tabs are expanded to 8 columns. - - If pattern space contains multiple lines, they are joined, but - since pattern space usually contains a single line, the behavior - of a simple @code{L;d} script is the same as @code{fmt -s} (i.e. - it does not join short lines to form longer ones). - - @var{n} specifies the desired line-wrap length; if omitted, - the default as specified on the command line is used. - @item r @var{filename} [At most one address allowed.] --- 1112,1117 ---- *************** *** 1146,1167 **** is supported for the file name, which reads the contents of the standard input. - @item R @var{filename} - @findex R (read line) command - @cindex Read text from a file - @cindex @value{SSEDEXT}, reading a file a line at a time - @cindex @value{SSEDEXT}, @code{R} command - @cindex @value{SSEDEXT}, @file{/dev/stdin} file - Queue a line of @var{filename} to be read and - inserted into the output stream at the end of the current cycle, - or when the next input line is read. - Note that if @var{filename} cannot be read, or if its end is - reached, no line is appended, without any error indication. - - As with the @code{r} command, the special value @file{/dev/stdin} - is supported for the file name, which reads a line from the - standard input. - @item w @var{filename} @findex w (write file) command @cindex Write to a file --- 1128,1133 ---- *************** *** 1171,1178 **** As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @code{-i} ! switch is being used.} The file will be created (or truncated) before the first input line is read; all @code{w} commands --- 1137,1144 ---- As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @option{-i} ! option is being used.} The file will be created (or truncated) before the first input line is read; all @code{w} commands *************** *** 1194,1200 **** @cindex Append next input line to pattern space Add a newline to the pattern space, then append the next line of input to the pattern space. ! If there is no more input then @code{sed} exits without processing any more commands. @item P --- 1160,1166 ---- @cindex Append next input line to pattern space Add a newline to the pattern space, then append the next line of input to the pattern space. ! If there is no more input then @command{sed} exits without processing any more commands. @item P *************** *** 1240,1255 **** @node Programming Commands ! @section Commands for @code{sed} gurus In most cases, use of these commands indicates that you are ! probably better off programming in something like @code{awk} or Perl. But occasionally one is committed to sticking ! with @code{sed}, and these commands can enable one to write quite convoluted scripts. @cindex Flow of control in scripts ! @table @samp @item : @var{label} [No addresses allowed.] --- 1206,1221 ---- @node Programming Commands ! @section Commands for @command{sed} gurus In most cases, use of these commands indicates that you are ! probably better off programming in something like @command{awk} or Perl. But occasionally one is committed to sticking ! with @command{sed}, and these commands can enable one to write quite convoluted scripts. @cindex Flow of control in scripts ! @table @code @item : @var{label} [No addresses allowed.] *************** *** 1273,1341 **** since the last input line was read or conditional branch was taken. The @var{label} may be omitted, in which case the next cycle is started. - @item T @var{label} - @findex T (test and branch if failed) command - @cindex @value{SSEDEXT}, branch if @code{s///} failed - @cindex Branch to a label, if @code{s///} failed - @cindex Conditional branch - Branch to @var{label} only if there have been no successful - @code{s}ubstitutions since the last input line was read or - conditional branch was taken. The @var{label} may be omitted, - in which case the next cycle is started. - @end table ! @node SSED-specific Commands ! @section Commands specific of @value{SSED} ! These commands are specific of @value{SSED}, so you must use them with care and only when you are sure that ! hindering portability is not so evil. They allow to check for @value{SSED} extensions or to do tasks that are required ! quite often, yet unsupported by standard @code{sed}s. @table @code - @item v - @findex v (version) command - @cindex @value{SSEDEXT}, checking for their presence - @cindex Requiring @value{SSED} - This command does nothing, but will make @code{sed} fail if - @value{SSED} extensions are not supported, simply because other - implementations of @code{sed} do not implement it. - - @item Q [@var{exit-code}] - @findex Q (silent Quit) command - @cindex @value{SSEDEXT}, quitting silently - @cindex @value{SSEDEXT}, returning an exit code - @cindex Quitting - This command is the same as @code{q}, but will not print the - contents of pattern space. Like @code{q}, it provides the - ability to return an exit code to the caller. - - This command can be useful because the only alternative ways - to accomplish this apparently trivial function are to use - the @code{-n} option (which can unnecessarily complicate - your script) or resorting to the following snippet, which - wastes time by reading the whole file without any visible effect: - - @example - :eat - $d # @r{Quit silently on the last line} - N # @r{Read another line, silently} - g # @r{Overwrite pattern space each time to save memory} - b eat - @end example - - @item T @var{label} - @findex T (test and branch if failed) command - @cindex @value{SSEDEXT}, branch if @code{s///} failed - @cindex Branch to a label, if @code{s///} failed - @cindex Conditional branch - Branch to @var{label} only if there have been no successful - @code{s}ubstitutions since the last input line was read or - conditional branch was taken. The @var{label} may be omitted, - in which case the next cycle is started. - @item e [@var{command}] @findex e (evaluate) command @cindex Evaluate Bourne-shell commands --- 1239,1256 ---- since the last input line was read or conditional branch was taken. The @var{label} may be omitted, in which case the next cycle is started. @end table ! @node Extended Commands ! @section Commands Specific to @value{SSED} ! These commands are specific to @value{SSED}, so you must use them with care and only when you are sure that ! hindering portability is not evil. They allow you to check for @value{SSED} extensions or to do tasks that are required ! quite often, yet are unsupported by standard @command{sed}s. @table @code @item e [@var{command}] @findex e (evaluate) command @cindex Evaluate Bourne-shell commands *************** *** 1345,1351 **** This command allows one to pipe input from a shell command into pattern space. Without parameters, the @code{e} command executes the command that is found in pattern space and ! replaces pattern space with the output; a trailing new-line is suppressed. If a parameter is specified, instead, the @code{e} command --- 1260,1266 ---- This command allows one to pipe input from a shell command into pattern space. Without parameters, the @code{e} command executes the command that is found in pattern space and ! replaces the pattern space with the output; a trailing new-line is suppressed. If a parameter is specified, instead, the @code{e} command *************** *** 1353,1368 **** (like @code{r} does). The command can run across multiple lines, all but the last ending with a back-slash. ! In both cases, results are undefined if the command to be ! executed contains a @code{nul} character. ! ! @item W @var{filename} ! @findex W (write first line) command ! @cindex Write first line to a file ! @cindex @value{SSEDEXT}, writing first line to a file ! Write to the given filename the portion of the pattern space up to ! the first newline. Everything said under the @code{w} command about ! file handling holds here too. @item L @var{n} @findex L (fLow paragraphs) command --- 1268,1275 ---- (like @code{r} does). The command can run across multiple lines, all but the last ending with a back-slash. ! In both cases, the results are undefined if the command to be ! executed contains a @sc{nul} character. @item L @var{n} @findex L (fLow paragraphs) command *************** *** 1379,1392 **** preserved in the output; successive input lines with different indentation are not joined; tabs are expanded to 8 columns. ! If pattern space contains multiple lines, they are joined, but ! since pattern space usually contains a single line, the behavior ! of a simple @code{L;d} script is the same as @code{fmt -s} (i.e. it does not join short lines to form longer ones). @var{n} specifies the desired line-wrap length; if omitted, the default as specified on the command line is used. @item R @var{filename} @findex R (read line) command @cindex Read text from a file --- 1286,1322 ---- preserved in the output; successive input lines with different indentation are not joined; tabs are expanded to 8 columns. ! If the pattern space contains multiple lines, they are joined, but ! since the pattern space usually contains a single line, the behavior ! of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e., it does not join short lines to form longer ones). @var{n} specifies the desired line-wrap length; if omitted, the default as specified on the command line is used. + @item Q [@var{exit-code}] + @findex Q (silent Quit) command + @cindex @value{SSEDEXT}, quitting silently + @cindex @value{SSEDEXT}, returning an exit code + @cindex Quitting + This command is the same as @code{q}, but will not print the + contents of pattern space. Like @code{q}, it provides the + ability to return an exit code to the caller. + + This command can be useful because the only alternative ways + to accomplish this apparently trivial function are to use + the @option{-n} option (which can unnecessarily complicate + your script) or resorting to the following snippet, which + wastes time by reading the whole file without any visible effect: + + @example + :eat + $d @i{Quit silently on the last line} + N @i{Read another line, silently} + g @i{Overwrite pattern space each time to save memory} + b eat + @end example + @item R @var{filename} @findex R (read line) command @cindex Read text from a file *************** *** 1403,1438 **** is supported for the file name, which reads a line from the standard input. @end table @node Escapes ! @section @sc{gnu} extensions for escapes in regular expressions @cindex @acronym{GNU} extensions, special escapes ! Until this chapter, you have only encountered escapes of the form ! @samp{\^}, which tell @code{sed} not to interpret the caret as a special character, but rather to take it literally. For example, @samp{\*} matches a single asterisk rather than zero or more backslashes. @cindex @code{POSIXLY_CORRECT} behavior, escapes ! This chapter introduces another kind of escapes@footnote{All ! the escapes that are introduced in this character are@sc{gnu} extensions, with the exception of @code{\n}. In basic regular expression mode, setting @code{POSIXLY_CORRECT} disables them.}---that is, escapes that are applied to a character or sequence of characters ! that ordinarily is taken literally, and that @code{sed} replaces with a special character. This provides a way of encoding non-printable characters in patterns in a visible manner. There is no restriction on the appearance of non-printing characters ! in a @code{sed} script but when a script is being prepared in the shell or by text editing, it is usually easier to use one of the following escape sequences than the binary character it represents: The list of these escapes is: ! @table @samp @item \a Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7). --- 1333,1393 ---- is supported for the file name, which reads a line from the standard input. + @item T @var{label} + @findex T (test and branch if failed) command + @cindex @value{SSEDEXT}, branch if @code{s///} failed + @cindex Branch to a label, if @code{s///} failed + @cindex Conditional branch + Branch to @var{label} only if there have been no successful + @code{s}ubstitutions since the last input line was read or + conditional branch was taken. The @var{label} may be omitted, + in which case the next cycle is started. + + @item v + @findex v (version) command + @cindex @value{SSEDEXT}, checking for their presence + @cindex Requiring @value{SSED} + This command does nothing, but makes @command{sed} fail if + @value{SSED} extensions are not supported, simply because other + versions of @command{sed} do not implement it. + + @item W @var{filename} + @findex W (write first line) command + @cindex Write first line to a file + @cindex @value{SSEDEXT}, writing first line to a file + Write to the given filename the portion of the pattern space up to + the first newline. Everything said under the @code{w} command about + file handling holds here too. @end table @node Escapes ! @section @acronym{GNU} Extensions for Escapes in Regular Expressions @cindex @acronym{GNU} extensions, special escapes ! Until this chapter, we have only encountered escapes of the form ! @samp{\^}, which tell @command{sed} not to interpret the caret as a special character, but rather to take it literally. For example, @samp{\*} matches a single asterisk rather than zero or more backslashes. @cindex @code{POSIXLY_CORRECT} behavior, escapes ! This chapter introduces another kind of escape@footnote{All ! the escapes introduced here are @acronym{GNU} extensions, with the exception of @code{\n}. In basic regular expression mode, setting @code{POSIXLY_CORRECT} disables them.}---that is, escapes that are applied to a character or sequence of characters ! that ordinarily are taken literally, and that @command{sed} replaces with a special character. This provides a way of encoding non-printable characters in patterns in a visible manner. There is no restriction on the appearance of non-printing characters ! in a @command{sed} script but when a script is being prepared in the shell or by text editing, it is usually easier to use one of the following escape sequences than the binary character it represents: The list of these escapes is: ! @table @code @item \a Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7). *************** *** 1455,1461 **** Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is any character. The precise effect of @samp{\c@var{x}} is as follows: if @var{x} is a lower case letter, it is converted to upper case. ! Then bit 6 of the character (hex 40) is inverted. Thus "\cz" becomes hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B. @item \d@var{xxx} --- 1410,1416 ---- Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is any character. The precise effect of @samp{\c@var{x}} is as follows: if @var{x} is a lower case letter, it is converted to upper case. ! Then bit 6 of the character (hex 40) is inverted. Thus @samp{\cz} becomes hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B. @item \d@var{xxx} *************** *** 1477,1507 **** @end table @samp{\b} (backspace) was omitted because of the conflict with ! the existing "word boundary" meaning. Other escapes match particular character class and are only valid in regular expressions: ! @table @samp @item \s ! Matches any whitespace character @item \S ! Matches any character that is not a whitespace character @item \w Matches any ``word'' character. A ``word'' character is any letter or digit or the underscore character. @item \W ! Matches any ``non-word'' character @end table @node Examples ! @chapter Some sample scripts ! Here are some @code{sed} scripts to guide you in the art of mastering ! @code{sed}... @menu Some exotic examples: --- 1432,1463 ---- @end table @samp{\b} (backspace) was omitted because of the conflict with ! the existing ``word boundary'' meaning. ! @c \b AND \B ARE NOT INTRODUCED, NOR ARE \` and \'. ADR Other escapes match particular character class and are only valid in regular expressions: ! @table @code @item \s ! Matches any whitespace character. @item \S ! Matches any character that is not a whitespace character. @item \w Matches any ``word'' character. A ``word'' character is any letter or digit or the underscore character. @item \W ! Matches any ``non-word'' character. @end table @node Examples ! @chapter Some Sample Scripts ! Here are some @command{sed} scripts to guide you in the art of mastering ! @command{sed}. @menu Some exotic examples: *************** *** 1527,1536 **** @end menu @node Centering lines ! @section Centering lines ! This script will center all lines of a file on a 80 columns width. ! To change that width, the number in @code{\@{\@}} must be replaced, and the number of added spaces also must be changed. Note how the buffer commands are used to separate parts in --- 1483,1492 ---- @end menu @node Centering lines ! @section Centering Lines ! This script centers all lines of a file on a 80 columns width. ! To change that width, the number in @code{\@{@dots{}\@}} must be replaced, and the number of added spaces also must be changed. Note how the buffer commands are used to separate parts in *************** *** 1541,1547 **** @example #!/usr/bin/sed -f ! #@r{ Put 80 spaces in the buffer} 1 @{ x s/^$/ / --- 1497,1503 ---- @example #!/usr/bin/sed -f ! # Put 80 spaces in the buffer 1 @{ x s/^$/ / *************** *** 1549,1577 **** x @} ! #@r{ del leading and trailing spaces} y/@kbd{tab}/ / s/^ *// s/ *$// ! #@r{ add a new-line and 80 spaces to end of line} G ! #@r{ keep first 81 chars (80 + a new-line)} s/^\(.\@{81\@}\).*$/\1/ ! #@r{ \2 matches half of the spaces, which are moved to the beginning} s/^\(.*\)\n\(.*\)\2/\2\1/ @end example @c end--------------------------------------------- @node Increment a number ! @section Increment a number This script is one of a few that demonstrate how to do arithmetic ! in @code{sed}. This is indeed possible@footnote{@code{sed} guru Greg ! Ubben wrote an implementation of the @code{dc} @sc{rpn} calculator! ! It is distributed together with sed.}, but must be done manually. To increment one number you just add 1 to last digit, replacing it by the following digit. There is one exception: when the digit --- 1505,1533 ---- x @} ! # del leading and trailing spaces y/@kbd{tab}/ / s/^ *// s/ *$// ! # add a new-line and 80 spaces to end of line G ! # keep first 81 chars (80 + a new-line) s/^\(.\@{81\@}\).*$/\1/ ! # \2 matches half of the spaces, which are moved to the beginning s/^\(.*\)\n\(.*\)\2/\2\1/ @end example @c end--------------------------------------------- @node Increment a number ! @section Increment a Number This script is one of a few that demonstrate how to do arithmetic ! in @command{sed}. This is indeed possible,@footnote{@command{sed} guru Greg ! Ubben wrote an implementation of the @command{dc} @sc{rpn} calculator! ! It is distributed together with sed.} but must be done manually. To increment one number you just add 1 to last digit, replacing it by the following digit. There is one exception: when the digit *************** *** 1580,1586 **** This solution by Bruno Haible is very clever and smart because it uses a single buffer; if you don't have this limitation, the ! algorithm used in @ref{cat -n, Numbering lines} is faster. It works by replacing trailing nines with an underscore, then using multiple @code{s} commands to increment the last digit, and then again substituting underscores with zeros. --- 1536,1542 ---- This solution by Bruno Haible is very clever and smart because it uses a single buffer; if you don't have this limitation, the ! algorithm used in @ref{cat -n, Numbering lines}, is faster. It works by replacing trailing nines with an underscore, then using multiple @code{s} commands to increment the last digit, and then again substituting underscores with zeros. *************** *** 1591,1607 **** /[^0-9]/ d ! #@r{ replace all leading 9s by _ (any other char except digits, could} ! #@r{ be used)} :d s/9\(_*\)$/_\1/ td ! #@r{ incr last digit only. The first line adds a most-significant} ! #@r{ digit of 1 if we have to add a digit.} ! #@r{} ! #@r{ The @code{tn} commands are not necessary, but make the thing} ! #@r{ faster} s/^\(_*\)$/1\1/; tn s/8\(_*\)$/9\1/; tn --- 1547,1563 ---- /[^0-9]/ d ! # replace all leading 9s by _ (any other character except digits, could ! # be used) :d s/9\(_*\)$/_\1/ td ! # incr last digit only. The first line adds a most-significant ! # digit of 1 if we have to add a digit. ! # ! # The @code{tn} commands are not necessary, but make the thing ! # faster s/^\(_*\)$/1\1/; tn s/8\(_*\)$/9\1/; tn *************** *** 1620,1634 **** @c end--------------------------------------------- @node Rename files to lower case ! @section Rename files to lower case ! This is a pretty strange use of @code{sed}. We transform text, and transform it to be shell commands, then just feed them to shell. ! Don't worry, even worse hacks are done when using @code{sed}; I have ! seen a script converting the output of @code{date} into a @code{bc} program! ! The main body of this is the @code{sed} script, which remaps the name from lower to upper (or vice-versa) and even checks out if the remapped name is the same as the original name. Note how the script is parameterized using shell --- 1576,1590 ---- @c end--------------------------------------------- @node Rename files to lower case ! @section Rename Files to Lower Case ! This is a pretty strange use of @command{sed}. We transform text, and transform it to be shell commands, then just feed them to shell. ! Don't worry, even worse hacks are done when using @command{sed}; I have ! seen a script converting the output of @command{date} into a @command{bc} program! ! The main body of this is the @command{sed} script, which remaps the name from lower to upper (or vice-versa) and even checks out if the remapped name is the same as the original name. Note how the script is parameterized using shell *************** *** 1637,1651 **** @c start------------------------------------------- @example #! /bin/sh ! #@r{ rename files to lower/upper case... } ! #@r{} ! #@r{ usage: } ! #@r{ move-to-lower * } ! #@r{ move-to-upper * } ! #@r{ or} ! #@r{ move-to-lower -R .} ! #@r{ move-to-upper -R .} ! #@r{} help() @{ --- 1593,1607 ---- @c start------------------------------------------- @example #! /bin/sh ! # rename files to lower/upper case... ! # ! # usage: ! # move-to-lower * ! # move-to-upper * ! # or ! # move-to-lower -R . ! # move-to-upper -R . ! # help() @{ *************** *** 1667,1680 **** @} apply_cmd='sh' ! finder='echo $* | tr " " "\n"' files_only= while : do case "$1" in -n) apply_cmd='cat' ;; ! -R) finder='find $* -type f';; -h) help ; exit 1 ;; *) break ;; esac --- 1623,1636 ---- @} apply_cmd='sh' ! finder='echo "$@" | tr " " "\n"' files_only= while : do case "$1" in -n) apply_cmd='cat' ;; ! -R) finder='find "$@" -type f';; -h) help ; exit 1 ;; *) break ;; esac *************** *** 1682,1688 **** done if [ -z "$1" ]; then ! echo Usage: $0 [-n] [-r] files... exit 1 fi --- 1638,1644 ---- done if [ -z "$1" ]; then ! echo Usage: $0 [-h] [-n] [-r] files... exit 1 fi *************** *** 1696,1730 **** eval $finder | sed -n ' ! #@r{ remove all trailing slashes} s/\/*$// ! #@r{ add ./ if there are no path, only filename} /\//! s/^/.\// ! #@r{ save path+filename} h ! #@r{ remove path} s/.*\/// ! #@r{ do conversion only on filename} y/'$FROM'/'$TO'/ ! #@r{ now line contains original path+file, while} ! #@r{ hold space contains the new filename} x ! #@r{ add converted file name to line, which now contains} ! #@r{ @var{path}/@var{file-name}\n@var{converted-file-name}} G ! #@r{ check if converted file name is equal to original file name,} ! #@r{ if it is, do not print nothing} /^.*\/\(.*\)\n\1/b ! #@r{ now, transform @code{@var{path}/@var{fromfile}\n@var{tofile}}, into} ! #@r{ @code{mv @var{path}/@var{fromfile} @var{path}/@var{tofile}} and print it} s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p ' | $apply_cmd --- 1652,1686 ---- eval $finder | sed -n ' ! # remove all trailing slashes s/\/*$// ! # add ./ if there is no path, only a filename /\//! s/^/.\// ! # save path+filename h ! # remove path s/.*\/// ! # do conversion only on filename y/'$FROM'/'$TO'/ ! # now line contains original path+file, while ! # hold space contains the new filename x ! # add converted file name to line, which now contains ! # path/file-name\nconverted-file-name G ! # check if converted file name is equal to original file name, ! # if it is, do not print nothing /^.*\/\(.*\)\n\1/b ! # now, transform path/fromfile\n, into ! # mv path/fromfile path/tofile and print it s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p ' | $apply_cmd *************** *** 1732,1741 **** @c end--------------------------------------------- @node Print bash environment ! @section Print bash environment This script strips the definition of the shell functions ! from the output of the @code{set} Bourne-shell command. @c start------------------------------------------- @example --- 1688,1697 ---- @c end--------------------------------------------- @node Print bash environment ! @section Print @command{bash} Environment This script strips the definition of the shell functions ! from the output of the @command{set} Bourne-shell command. @c start------------------------------------------- @example *************** *** 1744,1772 **** set | sed -n ' :x ! #@r{ if no occurrence of @samp{=()} print and load next line} ! /=() /! @{ p; b; @} ! #@r{ possible start of functions section} ! #@r{ save the line in case this is a var like FOO="() "} h ! #@r{ if the next line has a brace, we quit because} ! #@r{ nothing comes after functions} n /^@{/ q ! #@r{ print the old line} x; p ! #@r{ work on the new line now} x; bx ' @end example @c end--------------------------------------------- @node Reverse chars of lines ! @section Reverse chars of lines This script can be used to reverse the position of characters in lines. The technique moves two characters at a time, hence --- 1700,1734 ---- set | sed -n ' :x ! @ifinfo ! # if no occurrence of "=()" print and load next line ! @end ifinfo ! @ifnotinfo ! # if no occurrence of @samp{=()} print and load next line ! @end ifnotinfo ! /=()/! @{ p; b; @} ! / () $/! @{ p; b; @} ! # possible start of functions section ! # save the line in case this is a var like FOO="() " h ! # if the next line has a brace, we quit because ! # nothing comes after functions n /^@{/ q ! # print the old line x; p ! # work on the new line now x; bx ' @end example @c end--------------------------------------------- @node Reverse chars of lines ! @section Reverse Characters of Lines This script can be used to reverse the position of characters in lines. The technique moves two characters at a time, hence *************** *** 1776,1783 **** This is often needed to reset the flag that is tested by the @code{t} command. ! Imaginative readers will find uses to this script. An example ! is reversing the output of @code{banner}@footnote{This requires another script to pad the output of banner; for example @example --- 1738,1745 ---- This is often needed to reset the flag that is tested by the @code{t} command. ! Imaginative readers will find uses for this script. An example ! is reversing the output of @command{banner}.@footnote{This requires another script to pad the output of banner; for example @example *************** *** 1787,1793 **** sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' | ~/sedscripts/reverseline.sed @end example ! }. @c start------------------------------------------- @example --- 1749,1755 ---- sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' | ~/sedscripts/reverseline.sed @end example ! } @c start------------------------------------------- @example *************** *** 1795,1850 **** /../! b ! #@r{ Reverse a line. Begin embedding the line between two new-lines} s/^.*$/\ &\ / ! #@r{ Move first character at the end. The regexp matches until} ! #@r{ there are zero or one characters between the markers} tx :x s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ tx ! #@r{ Remove the new-line markers} s/\n//g @end example @c end--------------------------------------------- @node tac ! @section Reverse lines of files This one begins a series of totally useless (yet interesting) scripts emulating various Unix commands. This, in particular, ! is a @code{tac} workalike. ! Note that on implementations other than @sc{gnu} @code{sed} ! and @value{SSED} this script might easily overflow internal buffers. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! #@r{ reverse all lines of input, i.e. first line became last, ...} ! #@r{ from the second line, the buffer (which contains all previous lines)} ! #@r{ is *appended* to current line, so, the order will be reversed} 1! G ! #@r{ on the last line we're done -- print everything} $ p ! #@r{ store everything on the buffer again} h @end example @c end--------------------------------------------- @node cat -n ! @section Numbering lines ! This script replaces @code{cat -n}; in fact it formats its output ! exactly like @sc{gnu} @code{cat} does. Of course this is completely useless and for two reasons: first, because somebody else did it in C, second, because the following --- 1757,1815 ---- /../! b ! # Reverse a line. Begin embedding the line between two new-lines s/^.*$/\ &\ / ! # Move first character at the end. The regexp matches until ! # there are zero or one characters between the markers tx :x s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ tx ! # Remove the new-line markers s/\n//g @end example @c end--------------------------------------------- @node tac ! @section Reverse Lines of Files This one begins a series of totally useless (yet interesting) scripts emulating various Unix commands. This, in particular, ! is a @command{tac} workalike. ! Note that on implementations other than @acronym{GNU} @command{sed} ! @ifset PERL ! and @value{SSED} ! @end ifset ! this script might easily overflow internal buffers. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! # reverse all lines of input, i.e. first line became last, ... ! # from the second line, the buffer (which contains all previous lines) ! # is *appended* to current line, so, the order will be reversed 1! G ! # on the last line we're done -- print everything $ p ! # store everything on the buffer again h @end example @c end--------------------------------------------- @node cat -n ! @section Numbering Lines ! This script replaces @samp{cat -n}; in fact it formats its output ! exactly like @acronym{GNU} @command{cat} does. Of course this is completely useless and for two reasons: first, because somebody else did it in C, second, because the following *************** *** 1862,1869 **** @end example @c end--------------------------------------------- ! It uses @code{sed} to print the line number, then groups lines two ! by two using N. Of course, this script does not teach as much as the one presented below. The algorithm used for incrementing uses both buffers, so the line --- 1827,1834 ---- @end example @c end--------------------------------------------- ! It uses @command{sed} to print the line number, then groups lines two ! by two using @code{N}. Of course, this script does not teach as much as the one presented below. The algorithm used for incrementing uses both buffers, so the line *************** *** 1871,1914 **** is split so that changing digits go in a buffer and unchanged ones go in the other; the changed digits are modified in a single step (using a @code{y} command). The line number for the next line ! is then composed and stored in hold space, to be used in the next iteration. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! #@r{ Prime the pump on the first line} x /^$/ s/^.*$/1/ ! #@r{ Add the correct line number before the pattern} G h ! #@r{ Format it and print it} s/^/ / s/^ *\(......\)\n/\1 /p ! #@r{ Get the line number from hold space; add a zero} ! #@r{ if we're going to add a digit on the next line} g s/\n.*$// /^9*$/ s/^/0/ ! #@r{ separate changing/unchanged digits with an x} s/.9*$/x&/ ! #@r{ keep changing digits in hold space} h s/^.*x// y/0123456789/1234567890/ x ! #@r{ keep unchanged digits in pattern space} s/x.*$// ! #@r{ compose the new number, remove the new-line implicitly added by G} G s/\n// h --- 1836,1879 ---- is split so that changing digits go in a buffer and unchanged ones go in the other; the changed digits are modified in a single step (using a @code{y} command). The line number for the next line ! is then composed and stored in the hold space, to be used in the next iteration. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! # Prime the pump on the first line x /^$/ s/^.*$/1/ ! # Add the correct line number before the pattern G h ! # Format it and print it s/^/ / s/^ *\(......\)\n/\1 /p ! # Get the line number from hold space; add a zero ! # if we're going to add a digit on the next line g s/\n.*$// /^9*$/ s/^/0/ ! # separate changing/unchanged digits with an x s/.9*$/x&/ ! # keep changing digits in hold space h s/^.*x// y/0123456789/1234567890/ x ! # keep unchanged digits in pattern space s/x.*$// ! # compose the new number, remove the new-line implicitly added by G G s/\n// h *************** *** 1916,1928 **** @c end--------------------------------------------- @node cat -b ! @section Numbering non-blank lines ! Emulating @code{cat -b} is almost the same as @code{cat -n}---we only have to select which lines are to be numbered and which are not. The part that is common to this script and the previous one is ! not commented to show how important it is to comment @code{sed} scripts properly... @c start------------------------------------------- --- 1881,1893 ---- @c end--------------------------------------------- @node cat -b ! @section Numbering Non-blank Lines ! Emulating @samp{cat -b} is almost the same as @samp{cat -n}---we only have to select which lines are to be numbered and which are not. The part that is common to this script and the previous one is ! not commented to show how important it is to comment @command{sed} scripts properly... @c start------------------------------------------- *************** *** 1934,1940 **** b @} ! #@r{ Same as cat -n from now} x /^$/ s/^.*$/1/ G --- 1899,1905 ---- b @} ! # Same as cat -n from now x /^$/ s/^.*$/1/ G *************** *** 1957,1971 **** @c end--------------------------------------------- @node wc -c ! @section Counting chars ! This script shows another way to do arithmetic with @code{sed}. In this case we have to add possibly large numbers, so implementing this by successive increments would not be feasible (and possibly ! even more complicated to contrive than this script...). The approach is to map numbers to letters, kind of an abacus ! implemented with @code{sed}. @samp{a}s are units, @samp{b}s are tenths and so on: we simply add the number of characters on the current line as units, and then propagate the carry to tenths, hundredths, and so on. --- 1922,1936 ---- @c end--------------------------------------------- @node wc -c ! @section Counting Characters ! This script shows another way to do arithmetic with @command{sed}. In this case we have to add possibly large numbers, so implementing this by successive increments would not be feasible (and possibly ! even more complicated to contrive than this script). The approach is to map numbers to letters, kind of an abacus ! implemented with @command{sed}. @samp{a}s are units, @samp{b}s are tenths and so on: we simply add the number of characters on the current line as units, and then propagate the carry to tenths, hundredths, and so on. *************** *** 1984,1997 **** @example #!/usr/bin/sed -nf ! #@r{ Add n+1 a's to hold space (+1 is for the new-line)} s/./a/g H x s/\n/a/ ! #@r{ Do the carry. The t's and b's are not necessary,} ! #@r{ but they do speed up the thing} t a : a; s/aaaaaaaaaa/b/g; t b; b done : b; s/bbbbbbbbbb/c/g; t c; b done --- 1949,1962 ---- @example #!/usr/bin/sed -nf ! # Add n+1 a's to hold space (+1 is for the new-line) s/./a/g H x s/\n/a/ ! # Do the carry. The t's and b's are not necessary, ! # but they do speed up the thing t a : a; s/aaaaaaaaaa/b/g; t b; b done : b; s/bbbbbbbbbb/c/g; t c; b done *************** *** 2008,2014 **** b @} ! #@r{ On the last line, convert back to decimal} : loop /a/! s/[b-h]*/&0/ --- 1973,1979 ---- b @} ! # On the last line, convert back to decimal : loop /a/! s/[b-h]*/&0/ *************** *** 2030,2066 **** @c end--------------------------------------------- @node wc -w ! @section Counting words This script is almost the same as the previous one, once each of the words on the line is converted to a single @samp{a} (in the previous script each letter was changed to an @samp{a}). ! It is interesting that real @code{wc} programs have optimized ! loops for @code{wc -c}, so they are much slower at counting ! words rather than characters. These scripts' bottleneck, instead, is arithmetic, and hence the word-counting one is faster (it has to manage smaller numbers). Again, the common parts are not commented to show the importance ! of commenting @code{sed} scripts. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! #@r{ Convert words to a's} s/[ @kbd{tab}][ @kbd{tab}]*/ /g s/^/ / s/ [^ ][^ ]*/a /g s/ //g ! #@r{ Append them to hold space} H x s/\n// ! #@r{ From here on it is the same as in wc -c.} /aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g /bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g /cccccccccc/! bx; s/cccccccccc/d/g --- 1995,2031 ---- @c end--------------------------------------------- @node wc -w ! @section Counting Words This script is almost the same as the previous one, once each of the words on the line is converted to a single @samp{a} (in the previous script each letter was changed to an @samp{a}). ! It is interesting that real @command{wc} programs have optimized ! loops for @samp{wc -c}, so they are much slower at counting ! words rather than characters. This script's bottleneck, instead, is arithmetic, and hence the word-counting one is faster (it has to manage smaller numbers). Again, the common parts are not commented to show the importance ! of commenting @command{sed} scripts. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! # Convert words to a's s/[ @kbd{tab}][ @kbd{tab}]*/ /g s/^/ / s/ [^ ][^ ]*/a /g s/ //g ! # Append them to hold space H x s/\n// ! # From here on it is the same as in wc -c. /aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g /bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g /cccccccccc/! bx; s/cccccccccc/d/g *************** *** 2089,2098 **** @c end--------------------------------------------- @node wc -l ! @section Counting lines ! No strange things are done now, because @code{sed} gives us ! @code{wc -l} functionality for free!!! Look: @c start------------------------------------------- @example --- 2054,2063 ---- @c end--------------------------------------------- @node wc -l ! @section Counting Lines ! No strange things are done now, because @command{sed} gives us ! @samp{wc -l} functionality for free!!! Look: @c start------------------------------------------- @example *************** *** 2102,2110 **** @c end--------------------------------------------- @node head ! @section Printing the first lines ! This script is probably the simplest useful @code{sed} script. It displays the first 10 lines of input; the number of displayed lines is right before the @code{q} command. --- 2067,2075 ---- @c end--------------------------------------------- @node head ! @section Printing the First Lines ! This script is probably the simplest useful @command{sed} script. It displays the first 10 lines of input; the number of displayed lines is right before the @code{q} command. *************** *** 2116,2129 **** @c end--------------------------------------------- @node tail ! @section Printing the last lines Printing the last @var{n} lines rather than the first is more complex but indeed possible. @var{n} is encoded in the second line, before the bang character. ! This script is similar to the @code{tac} script in that it keeps the ! final output in hold space and prints it at the end: @c start------------------------------------------- @example --- 2081,2094 ---- @c end--------------------------------------------- @node tail ! @section Printing the Last Lines Printing the last @var{n} lines rather than the first is more complex but indeed possible. @var{n} is encoded in the second line, before the bang character. ! This script is similar to the @command{tac} script in that it keeps the ! final output in the hold space and prints it at the end: @c start------------------------------------------- @example *************** *** 2142,2158 **** restart the loop). The ``sliding window'' technique is a very powerful way to write ! efficient and complex @code{sed} scripts, because commands like @code{P} would require a lot of work if implemented manually. To introduce the technique, which is fully demonstrated in the rest of this chapter and is based on the @code{N}, @code{P} ! and @code{D} commands, here is an implementation of @code{tail} ! using a simple `sliding window'. This looks complicated but in fact the working is the same as the last script: after we have kicked in the appropriate number ! of lines, however, we stop using hold space to keep inter-line state, and instead use @code{N} and @code{D} to slide pattern space by one line: --- 2107,2123 ---- restart the loop). The ``sliding window'' technique is a very powerful way to write ! efficient and complex @command{sed} scripts, because commands like @code{P} would require a lot of work if implemented manually. To introduce the technique, which is fully demonstrated in the rest of this chapter and is based on the @code{N}, @code{P} ! and @code{D} commands, here is an implementation of @command{tail} ! using a simple ``sliding window.'' This looks complicated but in fact the working is the same as the last script: after we have kicked in the appropriate number ! of lines, however, we stop using the hold space to keep inter-line state, and instead use @code{N} and @code{D} to slide pattern space by one line: *************** *** 2171,2177 **** @node uniq ! @section Make duplicate lines unique This is an example of the art of using the @code{N}, @code{P} and @code{D} commands, probably the most difficult to master. --- 2136,2142 ---- @node uniq ! @section Make Duplicate Lines Unique This is an example of the art of using the @code{N}, @code{P} and @code{D} commands, probably the most difficult to master. *************** *** 2182,2188 **** h :b ! @r{On the last line, print and exit} $b N /^\(.*\)\n\1$/ @{ --- 2147,2153 ---- h :b ! # On the last line, print and exit $b N /^\(.*\)\n\1$/ @{ *************** *** 2192,2214 **** bb @} ! #@r{ If the @code{N} command had added the last line, print and exit} $b ! #@r{ The lines are different; print the first and go} ! #@r{ back working on the second.} P D @end example @c end--------------------------------------------- As you can see, we mantain a 2-line window using @code{P} and @code{D}. ! This technique is often used in advanced @code{sed} scripts. @node uniq -d ! @section Print duplicated lines of input ! This script prints only duplicated lines, like @code{uniq -d}. @c start------------------------------------------- @example --- 2157,2179 ---- bb @} ! # If the @code{N} command had added the last line, print and exit $b ! # The lines are different; print the first and go ! # back working on the second. P D @end example @c end--------------------------------------------- As you can see, we mantain a 2-line window using @code{P} and @code{D}. ! This technique is often used in advanced @command{sed} scripts. @node uniq -d ! @section Print Duplicated Lines of Input ! This script prints only duplicated lines, like @samp{uniq -d}. @c start------------------------------------------- @example *************** *** 2217,2227 **** $b N /^\(.*\)\n\1$/ @{ ! #@r{ Print the first of the duplicated lines} s/.*\n// p ! #@r{ Loop until we get a different line} :b $b N --- 2182,2192 ---- $b N /^\(.*\)\n\1$/ @{ ! # Print the first of the duplicated lines s/.*\n// p ! # Loop until we get a different line :b $b N *************** *** 2231,2255 **** @} @} ! #@r{ The last line cannot be followed by duplicates} $b ! #@r{ Found a different one. Leave it alone in the pattern space} ! #@r{ and go back to the top, hunting its duplicates} D @end example @c end--------------------------------------------- @node uniq -u ! @section Remove all duplicated lines ! This script prints only unique lines, like @code{uniq -u}. @c start------------------------------------------- @example #!/usr/bin/sed -f ! #@r{ Search for a duplicate line --- until that, print what you find.} $b N /^\(.*\)\n\1$/ ! @{ --- 2196,2220 ---- @} @} ! # The last line cannot be followed by duplicates $b ! # Found a different one. Leave it alone in the pattern space ! # and go back to the top, hunting its duplicates D @end example @c end--------------------------------------------- @node uniq -u ! @section Remove All Duplicated Lines ! This script prints only unique lines, like @samp{uniq -u}. @c start------------------------------------------- @example #!/usr/bin/sed -f ! # Search for a duplicate line --- until that, print what you find. $b N /^\(.*\)\n\1$/ ! @{ *************** *** 2258,2286 **** @} :c ! #@r{ Got two equal lines in pattern space. At the} ! #@r{ end of the file we simply exit} $d ! #@r{ Else, we keep reading lines with @code{N} until we} ! #@r{ find a different one} s/.*\n// N /^\(.*\)\n\1$/ @{ bc @} ! #@r{ Remove the last instance of the duplicate line} ! #@r{ and go back to the top} D @end example @c end--------------------------------------------- @node cat -s ! @section Squeezing blank lines As a final example, here are three scripts, of increasing complexity ! and speed, that implement the same function as @code{cat -s}, that is squeezing blank lines. The first leaves a blank line at the beginning and end if there are --- 2223,2251 ---- @} :c ! # Got two equal lines in pattern space. At the ! # end of the file we simply exit $d ! # Else, we keep reading lines with @code{N} until we ! # find a different one s/.*\n// N /^\(.*\)\n\1$/ @{ bc @} ! # Remove the last instance of the duplicate line ! # and go back to the top D @end example @c end--------------------------------------------- @node cat -s ! @section Squeezing Blank Lines As a final example, here are three scripts, of increasing complexity ! and speed, that implement the same function as @samp{cat -s}, that is squeezing blank lines. The first leaves a blank line at the beginning and end if there are *************** *** 2290,2305 **** @example #!/usr/bin/sed -f ! #@r{ on empty lines, join with next} ! #@r{ Note there is a star in the regexp} :x /^\n*$/ @{ N bx @} ! #@r{ now, squeeze all '\n', this can be also done by:} ! #@r{ @code{s/^\(\n\)*/\1/}} s/\n*/\ / @end example --- 2255,2270 ---- @example #!/usr/bin/sed -f ! # on empty lines, join with next ! # Note there is a star in the regexp :x /^\n*$/ @{ N bx @} ! # now, squeeze all '\n', this can be also done by: ! # s/^\(\n\)*/\1/ s/\n*/\ / @end example *************** *** 2313,2325 **** @example #!/usr/bin/sed -f ! #@r{ delete all leading empty lines} 1,/^./@{ /./!d @} ! #@r{ on an empty line we remove it and all the following} ! #@r{ empty lines, but one} :x /./!@{ N --- 2278,2290 ---- @example #!/usr/bin/sed -f ! # delete all leading empty lines 1,/^./@{ /./!d @} ! # on an empty line we remove it and all the following ! # empty lines, but one :x /./!@{ N *************** *** 2331,2366 **** This removes leading and trailing blank lines. It is also the fastest. Note that loops are completely done with @code{n} and ! @code{b}, without exploting the fact that @code{sed} cycles back to the top of the script automatically at the end of a line. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! #@r{ delete all (leading) blanks} /./!d ! #@r{ get here: so there is a non empty} :x ! #@r{ print it} p ! #@r{ get next} n ! #@r{ got chars? print it again, etc... } /./bx ! #@r{ no, don't have chars: got an empty line} :z ! #@r{ get next, if last line we finish here so no trailing} ! #@r{ empty lines are written} n ! #@r{ also empty? then ignore it, and get next... this will} ! #@r{ remove ALL empty lines} /./!bz ! #@r{ all empty lines were deleted/ignored, but we have a non empty. As} ! #@r{ what we want to do is to squeeze, insert a blank line artificially} i\ bx --- 2296,2331 ---- This removes leading and trailing blank lines. It is also the fastest. Note that loops are completely done with @code{n} and ! @code{b}, without exploting the fact that @command{sed} cycles back to the top of the script automatically at the end of a line. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! # delete all (leading) blanks /./!d ! # get here: so there is a non empty :x ! # print it p ! # get next n ! # got chars? print it again, etc... /./bx ! # no, don't have chars: got an empty line :z ! # get next, if last line we finish here so no trailing ! # empty lines are written n ! # also empty? then ignore it, and get next... this will ! # remove ALL empty lines /./!bz ! # all empty lines were deleted/ignored, but we have a non empty. As ! # what we want to do is to squeeze, insert a blank line artificially i\ bx *************** *** 2368,2386 **** @c end--------------------------------------------- @node Limitations ! @chapter @value{SSED}'s limitations and non-limitations @cindex @acronym{GNU} extensions, unlimited line length @cindex Portability, line length limitations ! For those who want to write portable @code{sed} scripts, be aware that some implementations have been known to limit line lengths (for the pattern and hold spaces) to be no more than 4000 bytes. ! The @sc{posix.2} standard specifies that conforming @code{sed} implementations shall support at least 8192 byte line lengths. @value{SSED} has no built-in limit on line length; ! as long as it can malloc() more (virtual) memory, ! you can feed or construct lines as long as you care. However, recursion is used to handle subpatterns and indefinite repetition. This means that the available stack space may limit --- 2333,2351 ---- @c end--------------------------------------------- @node Limitations ! @chapter @value{SSED}'s Limitations and Non-limitations @cindex @acronym{GNU} extensions, unlimited line length @cindex Portability, line length limitations ! For those who want to write portable @command{sed} scripts, be aware that some implementations have been known to limit line lengths (for the pattern and hold spaces) to be no more than 4000 bytes. ! The @sc{posix} standard specifies that conforming @command{sed} implementations shall support at least 8192 byte line lengths. @value{SSED} has no built-in limit on line length; ! as long as it can @code{malloc()} more (virtual) memory, ! you can feed or construct lines as long as you like. However, recursion is used to handle subpatterns and indefinite repetition. This means that the available stack space may limit *************** *** 2405,2411 **** encountered. Here are a few distinctions between the real Perl-style ! regular expressions and those that @code{-R} recognizes. @enumerate @item --- 2370,2376 ---- encountered. Here are a few distinctions between the real Perl-style ! regular expressions and those that @option{-R} recognizes. @enumerate @item *************** *** 2462,2475 **** @end ifset @node Other Resources ! @chapter Other resources for learning about @code{sed} ! @cindex Additional reading about @code{sed} ! In addition to several books that have been written about @code{sed} (either specifically or as chapters in books which discuss ! shell programming), one can find out more about @code{sed} (including suggestions of a few books) from the FAQ ! for the sed-users mailing list, available from any of: @display @uref{http://www.student.northpark.edu/pemente/sed/sedfaq.html} @uref{http://sed.sf.net/grabbag/tutorials/sedfaq.html} --- 2427,2440 ---- @end ifset @node Other Resources ! @chapter Other Resources for Learning About @command{sed} ! @cindex Additional reading about @command{sed} ! In addition to several books that have been written about @command{sed} (either specifically or as chapters in books which discuss ! shell programming), one can find out more about @command{sed} (including suggestions of a few books) from the FAQ ! for the @code{sed-users} mailing list, available from any of: @display @uref{http://www.student.northpark.edu/pemente/sed/sedfaq.html} @uref{http://sed.sf.net/grabbag/tutorials/sedfaq.html} *************** *** 2478,2580 **** Also of interest are @uref{http://www.student.northpark.edu/pemente/sed/index.htm} and @uref{http://sed.sf.net/grabbag}, ! which include sed tutorials and other sed-related goodies. ! There is a ``sed-users'' mailing list maintained by Sven Guckes. To subscribe, visit @uref{http://groups.yahoo.com} and search for the @code{sed-users} mailing list. @node Reporting Bugs ! @chapter Reporting bugs @cindex Bugs, reporting Email bug reports to @email{bonzini@@gnu.org}. Be sure to include the word ``sed'' somewhere in the @code{Subject:} field. ! Also, please include the output of @code{sed --version} in the body of your report if at all possible. Please do not send a bug report like this: @example ! @r{[while building frobme-1.3.4]} ! $ configure ! sed: file sedscr line 1: Unknown option to 's' @end example If @value{SSED} doesn't configure your favorite package, take a few extra minutes to identify the specific problem and make a stand-alone test case. Unlike other programs such as C compilers, making such test ! cases for @code{sed} is quite simple. A stand-alone test case includes all the data necessary to perform the ! test, and the specific invocation of @code{sed} that causes the problem. The smaller a stand-alone test case is, the better. A test case should ! not involve something as far removed from @code{sed} as ``try to configure frobme-1.3.4''. Yes, that is in principle enough information to look for the bug, but that is not a very practical prospect. Here are a few commonly reported bugs that are not bugs. @table @asis ! @item @code{sed -n} and @code{s/regex/replace/p} ! @cindex Portability, @code{p} command and @samp{-n} flag ! @cindex Non-bugs, @code{p} command and @samp{-n} flag ! Some versions of sed ignore the `p' (print) option of an `s' command ! unless the `-n' command switch has been specified. Other versions ! always honor the `p' option. Both approaches are allowed by @sc{posix.2} ! and @sc{gnu} @code{sed} (on which @value{SSED} is based) is the ! latter sort; I judge this approach to be better (give enough rope ! etc.) when you write complex scripts, but portable scripts should ! be written to work correctly with either behavior. ! @item regex syntax clashes @cindex @acronym{GNU} extensions, to basic regular expressions @cindex Non-bugs, regex syntax clashes ! @code{sed} uses the Posix basic regular expression syntax. According to the standard, the meaning of some escape sequences is undefined in ! this syntax; notable in the case of @code{sed} are @code{\|}, ! @code{\+}, @code{\?}, @code{\@code{}, @code{\}}, @code{\<}, @code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}. ! As in all GNU programs that use Posix basic regular expressions, sed interprets these escape sequences as meta-characters. So, @code{x\+} ! matches one or more occurrences of @code{x}. @code{abc\|def} matches ! either @code{abc} or @code{def}. This syntax may cause problems when running scripts written for other ! @code{sed}s. Some @code{sed} programs have been written with the assumption that @code{\|} and @code{\+} match the literal characters @code{|} and @code{+}. Such scripts must be modified by removing the spurious backslashes if they are to be used with modern implementations ! of @code{sed}, like @sc{gnu} @code{sed} or @value{SSED}. @cindex @acronym{GNU} extensions, special escapes ! In addition, this version of @code{sed} supports several escape characters (some of which are multi-character) to insert non-printable characters in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r}, @code{\t}, @code{\v}, @code{\x}). These can cause similar problems ! with scripts written for other @code{sed}s. ! @item @code{-i} clobbers read-only files @cindex In-place editing @cindex @value{SSEDEXT}, in-place editing @cindex Non-bugs, in-place editing ! In short, @code{sed d -i} will let one delete the contents of ! a read-only file, and in general the @code{-i} option ! (@pxref{Invoking sed, , Invocation} will let one clobber protected files. This is not a bug, but rather a consequence of how the Unix filesystem works. The permissions on a file say what can happen to the data in that file, while the permissions on a directory say what can ! happen to the list of files in that directory. @code{sed -i} ! will not ever open for writing a file that is already on disk, ! rather, it will work on a temporary file that is finally renamed to the original name: if you rename or delete files, you're actually modifying the contents of the directory, so the operation depends on ! the permissions of the directory, not of the file). For this same ! reason, @code{sed} will not let one use @code{-i} on a writeable file in a read-only directory (but unbelievably nobody reports that as a bug@dots{}). @end table --- 2443,2588 ---- Also of interest are @uref{http://www.student.northpark.edu/pemente/sed/index.htm} and @uref{http://sed.sf.net/grabbag}, ! which include @command{sed} tutorials and other @command{sed}-related goodies. ! The @code{sed-users} mailing list itself maintained by Sven Guckes. To subscribe, visit @uref{http://groups.yahoo.com} and search for the @code{sed-users} mailing list. @node Reporting Bugs ! @chapter Reporting Bugs @cindex Bugs, reporting Email bug reports to @email{bonzini@@gnu.org}. Be sure to include the word ``sed'' somewhere in the @code{Subject:} field. ! Also, please include the output of @samp{sed --version} in the body of your report if at all possible. Please do not send a bug report like this: @example ! @i{while building frobme-1.3.4} ! $ configure ! @error{} sed: file sedscr line 1: Unknown option to 's' @end example If @value{SSED} doesn't configure your favorite package, take a few extra minutes to identify the specific problem and make a stand-alone test case. Unlike other programs such as C compilers, making such test ! cases for @command{sed} is quite simple. A stand-alone test case includes all the data necessary to perform the ! test, and the specific invocation of @command{sed} that causes the problem. The smaller a stand-alone test case is, the better. A test case should ! not involve something as far removed from @command{sed} as ``try to configure frobme-1.3.4''. Yes, that is in principle enough information to look for the bug, but that is not a very practical prospect. Here are a few commonly reported bugs that are not bugs. @table @asis ! @item @samp{sed -n} and @samp{s/@var{regex}/@samp{replace}/p} ! @cindex Portability, @code{p} command and @option{-n} flag ! @cindex Non-bugs, @code{p} command and @option{-n} flag ! Some versions of @command{sed} ignore the @code{p} (print) option of an @code{s} command ! unless the @option{-n} command-line option has been specified. Other versions ! always honor the @code{p} option. ! @c CHECK THE CURRENT STANDARD. ADR. ! Both approaches are allowed by @sc{posix} ! and @acronym{GNU} @command{sed} ! @ifset PERL ! (on which @value{SSED} is based) ! @end ifset ! is the ! better when you write complex scripts and also more intuitive, but ! portable scripts should be written to work correctly with either ! behavior. ! @item @code{N} command on the last line ! @cindex Portability, @code{N} command on the last line ! @cindex Non-bugs, @code{N} command on the last line ! ! Most versions of @command{sed} exit without printing anything when ! the @command{N} command is issued on the last line of a file. ! @value{SSED} prints pattern space before exiting unless of course ! the @command{-n} command switch has been specified. This choice is ! by design. ! ! For example, the behavior of ! @example ! sed N foo bar ! @end example ! @noindent ! would depend on whether foo has an even or an odd number of ! lines@footnote{which is the actual ``bug'' that prompted the ! change in behavior}. Or, when writing a script to read the ! next few lines following a pattern match, traditional ! implementations of @code{sed} would force you to write ! something like ! @example ! /foo/@{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N @} ! @end example ! @noindent ! instead of just ! @example ! /foo/@{ N;N;N;N;N;N;N;N;N; @} ! @end example ! ! In any case, the simplest workaround is to use @code{$d;N} in ! scripts that rely on the traditional behavior. ! ! @item Regex syntax clashes @cindex @acronym{GNU} extensions, to basic regular expressions @cindex Non-bugs, regex syntax clashes ! @command{sed} uses the @sc{posix} basic regular expression syntax. According to the standard, the meaning of some escape sequences is undefined in ! this syntax; notable in the case of @command{sed} are @code{\|}, ! @code{\+}, @code{\?}, @code{\`}, @code{\'}, @code{\<}, @code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}. ! As in all GNU programs that use @sc{posix} basic regular expressions, @command{sed} interprets these escape sequences as meta-characters. So, @code{x\+} ! matches one or more occurrences of @samp{x}. @code{abc\|def} matches ! either @samp{abc} or @samp{def}. This syntax may cause problems when running scripts written for other ! @command{sed}s. Some @command{sed} programs have been written with the assumption that @code{\|} and @code{\+} match the literal characters @code{|} and @code{+}. Such scripts must be modified by removing the spurious backslashes if they are to be used with modern implementations ! of @command{sed}, like ! @ifset PERL ! @value{SSED} or ! @end ifset ! @acronym{GNU} @command{sed}. @cindex @acronym{GNU} extensions, special escapes ! In addition, this version of @command{sed} supports several escape characters (some of which are multi-character) to insert non-printable characters in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r}, @code{\t}, @code{\v}, @code{\x}). These can cause similar problems ! with scripts written for other @command{sed}s. ! @item @option{-i} clobbers read-only files @cindex In-place editing @cindex @value{SSEDEXT}, in-place editing @cindex Non-bugs, in-place editing ! In short, @samp{sed -i} will let you delete the contents of ! a read-only file, and in general the @option{-i} option ! (@pxref{Invoking sed, , Invocation}) lets you clobber protected files. This is not a bug, but rather a consequence of how the Unix filesystem works. The permissions on a file say what can happen to the data in that file, while the permissions on a directory say what can ! happen to the list of files in that directory. @samp{sed -i} ! will not ever open for writing a file that is already on disk. ! Rather, it will work on a temporary file that is finally renamed to the original name: if you rename or delete files, you're actually modifying the contents of the directory, so the operation depends on ! the permissions of the directory, not of the file. For this same ! reason, @command{sed} does not let you use @option{-i} on a writeable file in a read-only directory (but unbelievably nobody reports that as a bug@dots{}). @end table *************** *** 2592,2598 **** @noindent Examples: ! @table @samp @item abc? becomes @samp{abc\?} when using extended regular expressions. It matches the literal string @samp{abc?}. --- 2600,2606 ---- @noindent Examples: ! @table @code @item abc? becomes @samp{abc\?} when using extended regular expressions. It matches the literal string @samp{abc?}. *************** *** 2620,2630 **** @appendix Perl-style regular expressions @cindex Perl-style regular expressions, syntax ! @emph{This part is taken from the @code{pcre.txt} file distributed together with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.} Perl introduced several extensions to regular expressions, some of them incompatible with the syntax of regular expressions ! accepted by Emacs and other @sc{gnu} tools (whose matcher was based on the Emacs matcher). @value{SSED} implements both kinds of extensions. --- 2628,2639 ---- @appendix Perl-style regular expressions @cindex Perl-style regular expressions, syntax ! @emph{This part is taken from the @file{pcre.txt} file distributed together ! with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.} Perl introduced several extensions to regular expressions, some of them incompatible with the syntax of regular expressions ! accepted by Emacs and other @acronym{GNU} tools (whose matcher was based on the Emacs matcher). @value{SSED} implements both kinds of extensions. *************** *** 2660,2666 **** @code{\b}, but with subpatterns). @item ! You can often improve performance by avoiding that @code{sed} wastes time with backtracking @item --- 2669,2675 ---- @code{\b}, but with subpatterns). @item ! You can often improve performance by avoiding that @command{sed} wastes time with backtracking @item *************** *** 2719,2725 **** @cindex Perl-style regular expressions, backreferences The handling of a backslash followed by a digit other than 0 ! is complicated. Outside a character class, @code{sed} reads it and any following digits as a decimal number. If the number is less than 10, or if there have been at least that many previous capturing left parentheses in the expression, the --- 2728,2734 ---- @cindex Perl-style regular expressions, backreferences The handling of a backslash followed by a digit other than 0 ! is complicated. Outside a character class, @command{sed} reads it and any following digits as a decimal number. If the number is less than 10, or if there have been at least that many previous capturing left parentheses in the expression, the *************** *** 2729,2753 **** Inside a character class, or if the decimal number is greater than 9 and there have not been that many capturing ! subpatterns, @code{sed} re-reads up to three octal digits following the backslash, and generates a single byte from the least significant 8 bits of the value. Any subsequent digits stand for themselves. For example: @example ! \040 @r{is another way of writing a space} ! \40 @r{is the same, provided there are fewer than 40} ! @r{previous capturing subpatterns} ! \7 @r{is always a back reference} ! \011 @r{is always a tab} ! \11 @r{might be a back reference, or another way of} ! @r{writing a tab} ! \0113 @r{is a tab followed by the character @samp{3}} ! \113 @r{is the character with octal code 113 (since there} ! @r{can be no more than 99 back references)} ! \377 @r{is a byte consisting entirely of 1 bits (@sc{ascii} 255)} ! \81 @r{is either a back reference, or a binary zero} ! @r{followed by the two characters @samp{81}} @end example Note that octal values of 100 or greater must not be introduced --- 2738,2762 ---- Inside a character class, or if the decimal number is greater than 9 and there have not been that many capturing ! subpatterns, @command{sed} re-reads up to three octal digits following the backslash, and generates a single byte from the least significant 8 bits of the value. Any subsequent digits stand for themselves. For example: @example ! \040 @i{is another way of writing a space} ! \40 @i{is the same, provided there are fewer than 40} ! @i{previous capturing subpatterns} ! \7 @i{is always a back reference} ! \011 @i{is always a tab} ! \11 @i{might be a back reference, or another way of} ! @i{writing a tab} ! \0113 @i{is a tab followed by the character @samp{3}} ! \113 @i{is the character with octal code 113 (since there} ! @i{can be no more than 99 back references)} ! \377 @i{is a byte consisting entirely of 1 bits (@sc{ascii} 255)} ! \81 @i{is either a back reference, or a binary zero} ! @i{followed by the two characters @samp{81}} @end example Note that octal values of 100 or greater must not be introduced *************** *** 2817,2823 **** note that @code{\b} has a different meaning, namely the backspace character, inside a character class). Note that Perl mode does not support directly assertions ! for the beginning and the end of word; the @sc{gnu} extensions @code{\<} and @code{\>} achieve this purpose in @sc{posix} mode instead. --- 2826,2832 ---- note that @code{\b} has a different meaning, namely the backspace character, inside a character class). Note that Perl mode does not support directly assertions ! for the beginning and the end of word; the @acronym{GNU} extensions @code{\<} and @code{\>} achieve this purpose in @sc{posix} mode instead. *************** *** 2825,2831 **** from the traditional circumflex and dollar (described below) in that they only ever match at the very start and end of the subject string, whatever options are set; in particular @code{\A} ! and @code{\z} are the same as the @sc{gnu} extensions @code{\`} and @code{\'} that are active in @sc{posix} mode. @node Caret/dollar/full stop --- 2834,2840 ---- from the traditional circumflex and dollar (described below) in that they only ever match at the very start and end of the subject string, whatever options are set; in particular @code{\A} ! and @code{\z} are the same as the @acronym{GNU} extensions @code{\`} and @code{\'} that are active in @sc{posix} mode. @node Caret/dollar/full stop *************** *** 3228,3236 **** one that does not match the syntax of a quantifier, is taken as a literal character. For example, @{,6@} is not a quantifier, but a literal string of four characters.@footnote{It ! raises an error if @code{-R} is not used.} ! The quantifier @{0@} is permitted, causing the expression to behave as if the previous item and the quantifier were not present. --- 3237,3245 ---- one that does not match the syntax of a quantifier, is taken as a literal character. For example, @{,6@} is not a quantifier, but a literal string of four characters.@footnote{It ! raises an error if @option{-R} is not used.} ! The quantifier @samp{@{0@}} is permitted, causing the expression to behave as if the previous item and the quantifier were not present. *************** *** 3647,3653 **** @noindent when applied to a long string which does not match. Because ! matching proceeds from left to right, @code{sed} will look for each @samp{a} in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as --- 3656,3662 ---- @noindent when applied to a long string which does not match. Because ! matching proceeds from left to right, @command{sed} will look for each @samp{a} in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as *************** *** 3813,3819 **** The @code{(?p@{...@})} item interpolates Perl code at run time, and in this case refers recursively to the pattern in which it ! appears. Obviously, @code{sed} cannot support the interpolation of Perl code. Instead, the special item @code{(?R)} is provided for the specific case of recursion. This pattern solves the parentheses problem (assume the @code{X} modifier option is used --- 3822,3828 ---- The @code{(?p@{...@})} item interpolates Perl code at run time, and in this case refers recursively to the pattern in which it ! appears. Obviously, @command{sed} cannot support the interpolation of Perl code. Instead, the special item @code{(?R)} is provided for the specific case of recursion. This pattern solves the parentheses problem (assume the @code{X} modifier option is used *************** *** 3878,3884 **** @unnumbered Concept Index This is a general index of all issues discussed in this manual, with the ! exception of the @code{sed} commands and command-line options. @printindex cp --- 3887,3893 ---- @unnumbered Concept Index This is a general index of all issues discussed in this manual, with the ! exception of the @command{sed} commands and command-line options. @printindex cp *************** *** 3886,3892 **** @node Command and Option Index @unnumbered Command and Option Index ! This is an alphabetical list of all @code{sed} commands and command-line options. @printindex fn --- 3895,3901 ---- @node Command and Option Index @unnumbered Command and Option Index ! This is an alphabetical list of all @command{sed} commands and command-line options. @printindex fn diff -rNC3 sed-4.0.3/doc/sed.1 sed-4.0.4/doc/sed.1 *** sed-4.0.3/doc/sed.1 Thu Nov 21 12:46:05 2002 --- sed-4.0.4/doc/sed.1 Thu Dec 12 20:14:25 2002 *************** *** 1,7 **** .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.28. ! .TH SED "1" "October 2002" "sed version 4.0.1" "User Commands" .SH NAME ! sed \- manual page for sed version 4.0.1 .SH SYNOPSIS .B sed [\fIOPTION\fR]... \fI{script-only-if-no-other-script} \fR[\fIinput-file\fR]... --- 1,7 ---- .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.28. ! .TH SED "1" "November 2002" "sed version 4.0.3" "User Commands" .SH NAME ! sed \- manual page for sed version 4.0.3 .SH SYNOPSIS .B sed [\fIOPTION\fR]... \fI{script-only-if-no-other-script} \fR[\fIinput-file\fR]... *************** *** 340,346 **** Also, please include the output of ``sed --version'' in the body of your report if at all possible. .SH COPYRIGHT ! Copyright \(co 1999 Free Software Foundation, Inc. .br This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, --- 340,346 ---- Also, please include the output of ``sed --version'' in the body of your report if at all possible. .SH COPYRIGHT ! Copyright \(co 2002 Free Software Foundation, Inc. .br This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, diff -rNC3 sed-4.0.3/doc/sed.info sed-4.0.4/doc/sed.info *** sed-4.0.3/doc/sed.info Thu Nov 21 12:45:10 2002 --- sed-4.0.4/doc/sed.info Thu Dec 12 20:14:25 2002 *************** *** 1,5 **** --- 1,6 ---- This is sed.info, produced by makeinfo version 4.2 from sed.texi. + INFO-DIR-SECTION Text creation and manipulation START-INFO-DIR-ENTRY * sed: (sed). Stream EDitor. *************** *** 7,61 ****  Indirect: ! sed.info-1: 156 ! sed.info-2: 50025  Tag Table: (Indirect) ! Node: Top156 ! Node: Introduction3733 ! Node: Invoking sed4287 ! Ref: Invoking sed-Footnote-18137 ! Ref: Invoking sed-Footnote-28329 ! Node: sed Programs8436 ! Node: Addresses9597 ! Node: Regular Expressions14269 ! Node: Data Spaces19430 ! Node: Common Commands19961 ! Node: The "s" Command22487 ! Ref: The "s" Command-Footnote-126851 ! Node: Other Commands26923 ! Ref: Other Commands-Footnote-133993 ! Node: Programming Commands34065 ! Node: SSED-specific Commands35214 ! Node: Escapes38851 ! Ref: Escapes-Footnote-141356 ! Node: Examples41541 ! Node: Centering lines42638 ! Node: Increment a number43552 ! Ref: Increment a number-Footnote-145126 ! Node: Rename files to lower case45246 ! Node: Print bash environment48042 ! Node: Reverse chars of lines48795 ! Ref: Reverse chars of lines-Footnote-149803 ! Node: tac50025 ! Node: cat -n50820 ! Node: cat -b52672 ! Node: wc -c53424 ! Ref: wc -c-Footnote-155359 ! Node: wc -w55428 ! Node: wc -l56901 ! Node: head57138 ! Node: tail57462 ! Node: uniq58893 ! Node: uniq -d59687 ! Node: uniq -u60411 ! Node: cat -s61134 ! Node: Limitations63050 ! Node: Other Resources63890 ! Node: Reporting Bugs64810 ! Node: Extended regexps68764 ! Node: Concept Index69931 ! Node: Command and Option Index80637  End Tag Table --- 8,63 ----  Indirect: ! sed.info-1: 204 ! sed.info-2: 48550  Tag Table: (Indirect) ! Node: Top204 ! Node: Introduction3781 ! Node: Invoking sed4335 ! Ref: Invoking sed-Footnote-18140 ! Ref: Invoking sed-Footnote-28332 ! Node: sed Programs8439 ! Node: Addresses9595 ! Node: Regular Expressions14238 ! Node: Data Spaces19530 ! Node: Common Commands20061 ! Node: The "s" Command22584 ! Ref: The "s" Command-Footnote-126974 ! Node: Other Commands27046 ! Ref: Other Commands-Footnote-131971 ! Node: Programming Commands32043 ! Node: Extended Commands32951 ! Node: Escapes36584 ! Ref: Escapes-Footnote-139086 ! Node: Examples39250 ! Node: Centering lines40345 ! Node: Increment a number41259 ! Ref: Increment a number-Footnote-142839 ! Node: Rename files to lower case42959 ! Node: Print bash environment45747 ! Node: Reverse chars of lines46527 ! Ref: Reverse chars of lines-Footnote-147546 ! Node: tac47768 ! Node: cat -n48550 ! Node: cat -b50408 ! Node: wc -c51160 ! Ref: wc -c-Footnote-153102 ! Node: wc -w53171 ! Node: wc -l54643 ! Node: head54880 ! Node: tail55204 ! Node: uniq56643 ! Node: uniq -d57439 ! Node: uniq -u58163 ! Node: cat -s58887 ! Node: Limitations60800 ! Node: Other Resources61640 ! Node: Reporting Bugs62566 ! Ref: Reporting Bugs-Footnote-167318 ! Node: Extended regexps67389 ! Node: Concept Index68556 ! Node: Command and Option Index78474  End Tag Table diff -rNC3 sed-4.0.3/doc/sed.info-1 sed-4.0.4/doc/sed.info-1 *** sed-4.0.3/doc/sed.info-1 Thu Nov 21 12:45:10 2002 --- sed-4.0.4/doc/sed.info-1 Thu Dec 12 20:14:25 2002 *************** *** 1,11 **** This is sed.info, produced by makeinfo version 4.2 from sed.texi. START-INFO-DIR-ENTRY * sed: (sed). Stream EDitor. END-INFO-DIR-ENTRY ! This file documents version 4.0.3 of GNU `sed', a stream editor. Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. --- 1,12 ---- This is sed.info, produced by makeinfo version 4.2 from sed.texi. + INFO-DIR-SECTION Text creation and manipulation START-INFO-DIR-ENTRY * sed: (sed). Stream EDitor. END-INFO-DIR-ENTRY ! This file documents version 4.0.4 of GNU `sed', a stream editor. Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. *************** *** 25,31 **** ! This file documents version 4.0.3 of GNU `sed', a stream editor. Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. --- 26,32 ---- ! This file documents version 4.0.4 of GNU `sed', a stream editor. Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. *************** *** 66,72 **** * The "s" Command:: `sed''s Swiss Army Knife * Other Commands:: Less frequently used commands * Programming Commands:: Commands for `sed' gurus ! * SSED-specific Commands:: Commands specific of GNU `sed' * Escapes:: Specifying special characters Examples: --- 67,73 ---- * The "s" Command:: `sed''s Swiss Army Knife * Other Commands:: Less frequently used commands * Programming Commands:: Commands for `sed' gurus ! * Extended Commands:: Commands specific of GNU `sed' * Escapes:: Specifying special characters Examples: *************** *** 110,138 **** `sed' may be invoked with the following command-line options: ! ``-V'' ! ``--version'' Print out the version of `sed' that is being run and a copyright notice, then exit. ! ``-h'' ! ``--help'' Print a usage message briefly summarizing these command-line options and the bug-reporting address, then exit. ! ``-n'' ! ``--quiet'' ! ``--silent'' ! By default, `sed' will print out the pattern space at the end of ! each cycle through the script. These options disable this ! automatic printing, and `sed' will only produce output when ! explicitly told to via the `p' command. ! ``-i'[SUFFIX]' ! ``--in-place[=SUFFIX]'' This option specifies that files are to be edited in-place. GNU `sed' does this by creating a temporary file and sending output to ! this file rather than to the standard output(1). When the end of the file is reached, the temporary file is renamed to the output file's original name. --- 111,139 ---- `sed' may be invoked with the following command-line options: ! `-V' ! `--version' Print out the version of `sed' that is being run and a copyright notice, then exit. ! `-h' ! `--help' Print a usage message briefly summarizing these command-line options and the bug-reporting address, then exit. ! `-n' ! `--quiet' ! `--silent' ! By default, `sed' prints out the pattern space at the end of each ! cycle through the script. These options disable this automatic ! printing, and `sed' only produces output when explicitly told to ! via the `p' command. ! `-i[SUFFIX]' ! `--in-place[=SUFFIX]' This option specifies that files are to be edited in-place. GNU `sed' does this by creating a temporary file and sending output to ! this file rather than to the standard output.(1) When the end of the file is reached, the temporary file is renamed to the output file's original name. *************** *** 150,193 **** This option implies `-s'. ! ``-l' N' ! ``--line-length=N'' ! Specify the default line-wrap length for the 'l' command. A length of 0 (zero) means to never wrap long lines. If not specified, it is taken to be 70. ! ``-r'' ! ``--regexp-extended'' Use extended regular expressions rather than basic regular expressions. Extended regexps are those that `egrep' accepts; they can be clearer because they usually have less backslashes, ! but are a GNU extension and hence scripts that use it are not portable. *Note Extended regular expressions: Extended regexps. ! ``-s'' ! ``--separate'' By default, `sed' will consider the files specified on the command line as a single continuous long stream. This GNU `sed' extension ! allows the user to consider them separate files: range addresses ! (such as `/abc/,/def/') are not allowed to span several files, ! line numbers are relative to the start of each file, `$' refers to ! the last line of each file, and files invoked from the `R' ! commands are rewound at the start of each file. ! ``-u'' ! ``--unbuffered'' Buffer both input and output as minimally as practical. (This is particularly useful if the input is coming from the likes of `tail -f', and you wish to see the transformed output as soon as possible.) ! ``-e' SCRIPT' ! ``--expression=SCRIPT'' Add the commands in SCRIPT to the set of commands to be run while processing the input. ! ``-f' SCRIPT-FILE' ! ``--file=SCRIPT-FILE'' Add the commands contained in the file SCRIPT-FILE to the set of commands to be run while processing the input. --- 151,194 ---- This option implies `-s'. ! `-l N' ! `--line-length=N' ! Specify the default line-wrap length for the `l' command. A length of 0 (zero) means to never wrap long lines. If not specified, it is taken to be 70. ! `-r' ! `--regexp-extended' Use extended regular expressions rather than basic regular expressions. Extended regexps are those that `egrep' accepts; they can be clearer because they usually have less backslashes, ! but are a GNU extension and hence scripts that use them are not portable. *Note Extended regular expressions: Extended regexps. ! `-s' ! `--separate' By default, `sed' will consider the files specified on the command line as a single continuous long stream. This GNU `sed' extension ! allows the user to consider them as separate files: range ! addresses (such as `/abc/,/def/') are not allowed to span several ! files, line numbers are relative to the start of each file, `$' ! refers to the last line of each file, and files invoked from the ! `R' commands are rewound at the start of each file. ! `-u' ! `--unbuffered' Buffer both input and output as minimally as practical. (This is particularly useful if the input is coming from the likes of `tail -f', and you wish to see the transformed output as soon as possible.) ! `-e SCRIPT' ! `--expression=SCRIPT' Add the commands in SCRIPT to the set of commands to be run while processing the input. ! `-f SCRIPT-FILE' ! `--file=SCRIPT-FILE' Add the commands contained in the file SCRIPT-FILE to the set of commands to be run while processing the input. *************** *** 218,226 **** A `sed' program consists of one or more `sed' commands, passed in by one or more of the `-e', `-f', `--expression', and `--file' options, or the first non-option argument if zero of these options are used. This ! document will refer to "the" `sed' script; this will be understood to ! mean the in-order catenation of all of the SCRIPTs and SCRIPT-FILEs ! passed in. Each `sed' command consists of an optional address or address range, followed by a one-character command name and any additional --- 219,227 ---- A `sed' program consists of one or more `sed' commands, passed in by one or more of the `-e', `-f', `--expression', and `--file' options, or the first non-option argument if zero of these options are used. This ! document will refer to "the" `sed' script; this is understood to mean ! the in-order catenation of all of the SCRIPTs and SCRIPT-FILEs passed ! in. Each `sed' command consists of an optional address or address range, followed by a one-character command name and any additional *************** *** 235,241 **** * The "s" Command:: `sed''s Swiss Army Knife * Other Commands:: Less frequently used commands * Programming Commands:: Commands for `sed' gurus ! * SSED-specific Commands:: Commands specific of GNU `sed' * Escapes:: Specifying special characters  --- 236,242 ---- * The "s" Command:: `sed''s Swiss Army Knife * Other Commands:: Less frequently used commands * Programming Commands:: Commands for `sed' gurus ! * Extended Commands:: Commands specific of GNU `sed' * Escapes:: Specifying special characters  *************** *** 276,282 **** expression is compiled, thus it is illegal to specify them together with the empty regular expression. If `POSIXLY_CORRECT' is set, instead, `//' is the null match: this behavior is mandated ! by POSIX, but it would break too many legacy sed scripts to blithely change GNU `sed''s default behavior. `\%REGEXP%' --- 277,283 ---- expression is compiled, thus it is illegal to specify them together with the empty regular expression. If `POSIXLY_CORRECT' is set, instead, `//' is the null match: this behavior is mandated ! by POSIX, but it would break too many legacy `sed' scripts to blithely change GNU `sed''s default behavior. `\%REGEXP%' *************** *** 284,291 **** This also matches the regular expression REGEXP, but allows one to use a different delimiter than `/'. This is particularly useful ! if the REGEXP itself contains a lot of `/'s, since it avoids the ! tedious escaping of every `/'. If REGEXP itself includes any delimiter characters, each must be escaped by a backslash (`\'). `/REGEXP/I' --- 285,292 ---- This also matches the regular expression REGEXP, but allows one to use a different delimiter than `/'. This is particularly useful ! if the REGEXP itself contains a lot of slashes, since it avoids ! the tedious escaping of every `/'. If REGEXP itself includes any delimiter characters, each must be escaped by a backslash (`\'). `/REGEXP/I' *************** *** 299,307 **** extension which causes `^' and `$' to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special ! character sequences (`\A' and `\Z' in Perl mode, `\`' and `\'' in ! basic or extended regular expression modes) which always match the ! beginning or the end of the buffer. `M' stands for `multi-line'. If no addresses are given, then all lines are matched; if one address is given, then only lines matching that address are matched. --- 300,308 ---- extension which causes `^' and `$' to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special ! character sequences (`\`' and `\'' in basic or extended regular ! expression modes) which always match the beginning or the end of ! the buffer. `M' stands for `multi-line'. If no addresses are given, then all lines are matched; if one address is given, then only lines matching that address are matched. *************** *** 318,348 **** If the second address is a NUMBER less than (or equal to) the line matching the first address, then only the one line is matched. ! GNU `sed' also supports some special 2-address forms: `0,ADDR2' Start out in "matched first address" state, until ADDR2 is found. ! This is similar to 1,ADDR2, except that if ADDR2 matches the very ! first line of input the 0,ADDR2 form will be at the end of its ! range, whereas the 1,ADDR2 form will still be at the beginning of ! its range. `ADDR1,+N' ! Will match ADDR1 and the N lines following ADDR1. `ADDR1,~N' ! Will match ADDR1 and the lines following ADDR1 until the next line whose input line number is a multiple of N. Appending the `!' character to the end of an address specification ! will negate the sense of the match. That is, if the `!' character ! follows an address range, then only lines which do _not_ match the ! address range will be selected. This also works for singleton ! addresses, and, perhaps perversely, for the null address.  File: sed.info, Node: Regular Expressions, Next: Data Spaces, Prev: Addresses, Up: sed Programs ! Overview of regular expression syntax ===================================== To know how to use `sed', people should understand regular --- 319,349 ---- If the second address is a NUMBER less than (or equal to) the line matching the first address, then only the one line is matched. ! GNU `sed' also supports some special two-address forms: `0,ADDR2' Start out in "matched first address" state, until ADDR2 is found. ! This is similar to `1,ADDR2', except that if ADDR2 matches the ! very first line of input the 0,ADDR2 form will be at the end of ! its range, whereas the 1,ADDR2 form will still be at the beginning ! of its range. `ADDR1,+N' ! Matches ADDR1 and the N lines following ADDR1. `ADDR1,~N' ! Matches ADDR1 and the lines following ADDR1 until the next line whose input line number is a multiple of N. Appending the `!' character to the end of an address specification ! negates the sense of the match. That is, if the `!' character follows ! an address range, then only lines which do _not_ match the address range ! will be selected. This also works for singleton addresses, and, ! perhaps perversely, for the null address.  File: sed.info, Node: Regular Expressions, Next: Data Spaces, Prev: Addresses, Up: sed Programs ! Overview of Regular Expression Syntax ===================================== To know how to use `sed', people should understand regular *************** *** 362,372 **** description of regular expression syntax as used in `sed'. `CHAR' ! A single char, if not special, is matched against text. `*' ! Matches a sequence of zero or more repetitions of previous char, ! grouped regexp (see below), or class. `\+' As *, but matches one or more. It is a GNU extension. --- 363,373 ---- description of regular expression syntax as used in `sed'. `CHAR' ! A single character, if not special, is matched against text. `*' ! Matches a sequence of zero or more repetitions of previous ! character, grouped regexp (see below), or class. `\+' As *, but matches one or more. It is a GNU extension. *************** *** 387,422 **** `\(REGEXP\)' Groups the inner REGEXP as a whole, this is used to: ! * apply postfix operators, like `\(abcd\)*': this will search for zero or more whole sequences of `abcd', while `abcd*' would search for `abc' followed by zero or more occurrences ! of `d' ! * use back references (see below) `.' Matches any character `^' ! Match the null string at beginning of line, i.e. what what appears after the caret must appear at the beginning of line. `^#include' ! will match only lines where "#include" is the first thing on ! line--if there are one or two spaces before, the match fails. `$' It is the same as `^', but refers to end of line `[LIST]' `[^LIST]' ! Matches any single char in LIST: for example, `[aeiou]' matches ! all vowels. A list may include sequences like `CHAR1-CHAR2', which ! matches any character between (inclusive) CHAR1 and CHAR2. The caret reverses the meaning of the regexp, so that it matches ! any single char NOT in list. To include `]' in the list, make it ! the first character (after the caret if needed), to include `-' in ! the list, make it the first or last; to include `^' put it after ! the first character. `REGEXP1\|REGEXP2' Matches either REGEXP1 or REGEXP2. Use parentheses to use complex --- 388,425 ---- `\(REGEXP\)' Groups the inner REGEXP as a whole, this is used to: ! * Apply postfix operators, like `\(abcd\)*': this will search for zero or more whole sequences of `abcd', while `abcd*' would search for `abc' followed by zero or more occurrences ! of `d'. Note that this is not in the POSIX standard and ! hence is not portable. ! * Use back references (see below) `.' Matches any character `^' ! Matches the null string at beginning of line, i.e. what appears after the caret must appear at the beginning of line. `^#include' ! will match only lines where `#include' is the first thing on ! line--if there are spaces before, for example, the match fails. `$' It is the same as `^', but refers to end of line `[LIST]' `[^LIST]' ! Matches any single character in LIST: for example, `[aeiou]' ! matches all vowels. A list may include sequences like ! `CHAR1-CHAR2', which matches any character between (inclusive) ! CHAR1 and CHAR2. The caret reverses the meaning of the regexp, so that it matches ! any single character NOT in list. To include `]' in the list, ! make it the first character (after the caret if needed), to ! include `-' in the list, make it the first or last; to include `^' ! put it after the first character. `REGEXP1\|REGEXP2' Matches either REGEXP1 or REGEXP2. Use parentheses to use complex *************** *** 425,453 **** succeeds is used. It is a GNU extension. `\DIGIT' ! Matches the DIGIT-th `\(\)' reference in the regular expression. `\CHAR' ! Matches character CHAR; this is to be used to match special chars, ! referred above. Note that the only C-like backslash sequence that ! you can portably assume to be interpreted is `\n' for a new-line; ! in particular `\t' matches a `t' under most implementations of ! `sed', rather than a tabulation character. ! Note that the regular expression matcher is greedy, i.e. if two or ! more matches are detected, it selects the longest, if there are two or more selected with the same size, it selects the first in text. Examples: `abcdef' ! Matches `abcdef' `a*b' Matches zero or more `a's followed by a single `b'. For example, `b' or `aaaaab'. `a\?b' ! Matches `b' or `ab' `a\+b\+' Matches one or more `a's followed by one or more `b's: `ab' is the --- 428,456 ---- succeeds is used. It is a GNU extension. `\DIGIT' ! Matches the DIGIT-th `\(...\)' reference in the regular expression. `\CHAR' ! Matches character CHAR; this is to be used to match special ! characters, referred above. Note that the only C-like backslash ! sequence that you can portably assume to be interpreted is `\n' ! for a new-line; in particular `\t' matches a `t' under most ! implementations of `sed', rather than a tabulation character. ! Note that the regular expression matcher is greedy, i.e., if two or ! more matches are detected, it selects the longest; if there are two or more selected with the same size, it selects the first in text. Examples: `abcdef' ! Matches `abcdef'. `a*b' Matches zero or more `a's followed by a single `b'. For example, `b' or `aaaaab'. `a\?b' ! Matches `b' or `ab'. `a\+b\+' Matches one or more `a's followed by one or more `b's: `ab' is the *************** *** 456,502 **** `.*' `.\+' ! These two will both match all the characters on a line; however, ! the first will match every line (including empty ones), while the ! second will only match lines containing at least one char. `^main.*(.*)' ! This will search for a line containing "main" as the first thing ! on the line, followed by an opening and closing parenthesis. The ! `n', `(' and `)' need not be adjacent `^#' ! This will match lines beginning with a hash (or sharp) character. `\\$' ! This will match lines ending with a single backslash. The regexp contains two backslashes for escaping. `\$' ! Instead, this will match lines containing a single dollar, because ! it is escaped. ! `[a-zA-Z_]' ! This will match any letters or digits `[^ tab]\+' ! This will match one or more sequences of any char that isn't a ! space or tab. Usually this means a word `^\(.*\)\n\1$' ! This will match two equal lines without a trailing new-line ! `A.\{9\}$' ! This will match an "A" that is exactly the last tenth character on ! line ! `^.\{,15\}A' ! Match the last "A" on the first 16 chars of the line  File: sed.info, Node: Data Spaces, Next: Common Commands, Prev: Regular Expressions, Up: sed Programs ! Where `sed' buffers data ======================== `sed' maintains two data buffers: the active _pattern_ space, and --- 459,505 ---- `.*' `.\+' ! These two both match all the characters on a line; however, the ! first matches every line (including empty ones), while the second ! only matches lines containing at least one character. `^main.*(.*)' ! This searches for a line containing `main' as the first thing on ! the line, followed by an opening and closing parenthesis. The ! `n', `(' and `)' need not be adjacent. `^#' ! This matches lines beginning with a hash (or sharp) character. `\\$' ! This matches lines ending with a single backslash. The regexp contains two backslashes for escaping. `\$' ! Instead, this matches lines containing a single dollar, because it ! is escaped. ! `[a-zA-Z0-9]' ! This matches any letters or digits. `[^ tab]\+' ! This matches one or more sequences of any character that isn't a ! space or tab. Usually this means a word. `^\(.*\)\n\1$' ! This matches two equal lines without a trailing new-line. ! `.\{9\}A$' ! This matches an `A' that is the last character on line, with at ! least nine preceding characters. ! `^.\{15\}A' ! This matches an `A' that is the 16th character on a line.  File: sed.info, Node: Data Spaces, Next: Common Commands, Prev: Regular Expressions, Up: sed Programs ! Where `sed' Buffers Data ======================== `sed' maintains two data buffers: the active _pattern_ space, and *************** *** 509,515 ****  File: sed.info, Node: Common Commands, Next: The "s" Command, Prev: Data Spaces, Up: sed Programs ! Often used commands =================== If you use `sed' at all, you will quite likely want to know these --- 512,518 ----  File: sed.info, Node: Common Commands, Next: The "s" Command, Prev: Data Spaces, Up: sed Programs ! Often-Used Commands =================== If you use `sed' at all, you will quite likely want to know these *************** *** 522,530 **** the next newline. If you are concerned about portability, be aware that some ! implementations of `sed' (which are not POSIX.2 conformant) may ! only support a single one-line comment, and then only when the ! very first character of the script is a `#'. Warning: if the first two characters of the `sed' script are `#n', then the `-n' (no-autoprint) option is forced. If you want to put --- 525,533 ---- the next newline. If you are concerned about portability, be aware that some ! implementations of `sed' (which are not POSIX conformant) may only ! support a single one-line comment, and then only when the very ! first character of the script is a `#'. Warning: if the first two characters of the `sed' script are `#n', then the `-n' (no-autoprint) option is forced. If you want to put *************** *** 538,545 **** Exit `sed' without processing any more commands or input. Note that the current pattern space is printed if auto-print is not ! disabled with the `-n' switch. The ability to return an exit code ! from the `sed' script is a GNU `sed' extension. `d' Delete the pattern space; immediately start next cycle. --- 541,548 ---- Exit `sed' without processing any more commands or input. Note that the current pattern space is printed if auto-print is not ! disabled with the `-n' options. The ability to return an exit ! code from the `sed' script is a GNU `sed' extension. `d' Delete the pattern space; immediately start next cycle. *************** *** 552,558 **** Note: some implementations of `sed', such as this one, will double-print lines when auto-print is not disabled and the `p' command is given. Other implementations will only print the line ! once. Both ways conform with the POSIX.2 standard, and so neither way can be considered to be in error. Portable `sed' scripts should thus avoid relying on either --- 555,561 ---- Note: some implementations of `sed', such as this one, will double-print lines when auto-print is not disabled and the `p' command is given. Other implementations will only print the line ! once. Both ways conform with the POSIX standard, and so neither way can be considered to be in error. Portable `sed' scripts should thus avoid relying on either *************** *** 578,584 **** =============== The syntax of the `s' (as in substitute) command is ! s/REGEXP/REPLACEMENT/FLAGS. The `/' characters may be uniformly replaced by any other single character within any given `s' command. The `/' character (or whatever other character is used in its stead) can appear in the REGEXP or REPLACEMENT only if it is preceded by a `\' --- 581,587 ---- =============== The syntax of the `s' (as in substitute) command is ! `s/REGEXP/REPLACEMENT/FLAGS'. The `/' characters may be uniformly replaced by any other single character within any given `s' command. The `/' character (or whatever other character is used in its stead) can appear in the REGEXP or REPLACEMENT only if it is preceded by a `\' *************** *** 593,610 **** The REPLACEMENT can contain `\N' (N being a number from 1 to 9, inclusive) references, which refer to the portion of the match which is contained between the Nth `\(' and its matching `\)'. Also, the ! REPLACEMENT can contain unescaped `&' characters which will reference ! the whole matched portion of the pattern space. Finally (this is a GNU `sed' extension) you can include a special sequence made of a backslash ! and one of the letters `LlUuE'. The meaning is, respectively: turn the ! replacement to lowercase until a `\U' or `\E' is found, turn the next ! character to lowercase, turn the replacement to uppercase until a `\L' ! or `\E' is found, turn the next character to uppercase, and stop case ! conversion started by `\L' or `\U'. To include a literal `\', `&', or ! newline in the final replacement, be sure to precede the desired `\', ! `&', or newline in the REPLACEMENT with a `\'. ! The `s' command can be followed with zero or more of the following FLAGS: `g' --- 596,627 ---- The REPLACEMENT can contain `\N' (N being a number from 1 to 9, inclusive) references, which refer to the portion of the match which is contained between the Nth `\(' and its matching `\)'. Also, the ! REPLACEMENT can contain unescaped `&' characters which reference the ! whole matched portion of the pattern space. Finally (this is a GNU `sed' extension) you can include a special sequence made of a backslash ! and one of the letters `L', `l', `U', `u', or `E'. The meaning is as ! follows: ! `\L' ! Turn the replacement to lowercase until a `\U' or `\E' is found, ! ! `\l' ! Turn the next character to lowercase, ! ! `\U' ! Turn the replacement to uppercase until a `\L' or `\E' is found, ! ! `\u' ! Turn the next character to uppercase, ! ! `\E' ! Stop case conversion started by `\L' or `\U'. ! ! To include a literal `\', `&', or newline in the final replacement, ! be sure to precede the desired `\', `&', or newline in the REPLACEMENT ! with a `\'. ! ! The `s' command can be followed by zero or more of the following FLAGS: `g' *************** *** 614,639 **** `NUMBER' Only replace the NUMBERth match of the REGEXP. ! Note: the POSIX.2 standard does not specify what should happen ! when you mix the `g' and NUMBER modifiers, and currently there is ! no widely agreed upon meaning across `sed' implementations. For ! GNU `sed', the interaction is defined to be: ignore matches before ! the NUMBERth, and then match and replace all matches from the ! NUMBERth on. `p' If the substitution was made, then print the new pattern space. Note: when both the `p' and `e' options are specified, the relative ordering of the two produces very different results. In ! general, `ep' (evaluate then print) will be what you want, but operating the other way round can be useful for debugging. For ! this reason, the current versions of GNU `sed' interprets ! specially the presence of `p' options both before and after `e', ! printing pattern space before and after evaluation, while in ! general flags for the `s' command show their effect just once. ! This behavior, although documented, might change in future ! versions. `w FILE-NAME' If the substitution was made, then write out the result to the --- 631,655 ---- `NUMBER' Only replace the NUMBERth match of the REGEXP. ! Note: the POSIX standard does not specify what should happen when ! you mix the `g' and NUMBER modifiers, and currently there is no ! widely agreed upon meaning across `sed' implementations. For GNU ! `sed', the interaction is defined to be: ignore matches before the ! NUMBERth, and then match and replace all matches from the NUMBERth ! on. `p' If the substitution was made, then print the new pattern space. Note: when both the `p' and `e' options are specified, the relative ordering of the two produces very different results. In ! general, `ep' (evaluate then print) is what you want, but operating the other way round can be useful for debugging. For ! this reason, the current version of GNU `sed' interprets specially ! the presence of `p' options both before and after `e', printing ! the pattern space before and after evaluation, while in general ! flags for the `s' command show their effect just once. This ! behavior, although documented, might change in future versions. `w FILE-NAME' If the substitution was made, then write out the result to the *************** *** 647,654 **** pattern space. If a substitution was made, the command that is found in pattern space is executed and pattern space is replaced with its output. A trailing new-line is suppressed; results are ! undefined if the command to be executed contains a `nul' ! character. This is a GNU `sed' extension. `I' `i' --- 663,670 ---- pattern space. If a substitution was made, the command that is found in pattern space is executed and pattern space is replaced with its output. A trailing new-line is suppressed; results are ! undefined if the command to be executed contains a NUL character. ! This is a GNU `sed' extension. `I' `i' *************** *** 661,706 **** extension which causes `^' and `$' to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special ! character sequences (`\A' and `\Z' in Perl mode, `\`' and `\'' in ! basic or extended regular expression modes) which always match the ! beginning or the end of the buffer. `M' stands for `multi-line'. ---------- Footnotes ---------- ! (1) This is equivalent to `p' unless the `-i' switch is being used.  File: sed.info, Node: Other Commands, Next: Programming Commands, Prev: The "s" Command, Up: sed Programs ! Less frequently used commands ============================= Though perhaps less frequently used than those in the previous section, some very small yet useful `sed' scripts can be built with these commands. - `v' - This command does nothing, but will make `sed' fail if GNU `sed' - extensions are not supported, simply because other implementations - of `sed' do not implement it. - - `Q [EXIT-CODE]' - This command is the same as `q', but will not print the contents - of pattern space. Like `q', it provides the ability to return an - exit code to the caller. - - This command can be useful because the only alternative ways to - accomplish this apparently trivial function are to use the `-n' - option (which can unnecessarily complicate your script) or - resorting to the following snippet, which wastes time by reading - the whole file without any visible effect: - - :eat - $d # Quit silently on the last line - N # Read another line, silently - g # Overwrite pattern space each time to save memory - b eat - `y/SOURCE-CHARS/DEST-CHARS/' (The `/' characters may be uniformly replaced by any other single character within any given `y' command.) --- 677,700 ---- extension which causes `^' and `$' to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special ! character sequences (`\`' and `\'' in basic or extended regular ! expression modes) which always match the beginning or the end of ! the buffer. `M' stands for `multi-line'. ---------- Footnotes ---------- ! (1) This is equivalent to `p' unless the `-i' option is being used.  File: sed.info, Node: Other Commands, Next: Programming Commands, Prev: The "s" Command, Up: sed Programs ! Less Frequently-Used Commands ============================= Though perhaps less frequently used than those in the previous section, some very small yet useful `sed' scripts can be built with these commands. `y/SOURCE-CHARS/DEST-CHARS/' (The `/' characters may be uniformly replaced by any other single character within any given `y' command.) *************** *** 719,726 **** [At most one address allowed.] Queue the lines of text which follow this command (each but the ! last ending with a `\', which will be removed from the output) to ! be output at the end of the current cycle, or when the next input line is read. As a GNU extension, if between the `a' and the newline there is --- 713,720 ---- [At most one address allowed.] Queue the lines of text which follow this command (each but the ! last ending with a `\', which are removed from the output) to be ! output at the end of the current cycle, or when the next input line is read. As a GNU extension, if between the `a' and the newline there is *************** *** 735,749 **** [At most one address allowed.] Immediately output the lines of text which follow this command ! (each but the last ending with a `\', which will be removed from ! the output). `c\' `TEXT' Delete the lines matching the address or address-range, and output the lines of text which follow this command (each but the last ! ending with a `\', which will be removed from the output) in place ! of the last line (or in place of each line, if no addresses were specified). A new cycle is started after this command is done, since the pattern space will have been deleted. --- 729,743 ---- [At most one address allowed.] Immediately output the lines of text which follow this command ! (each but the last ending with a `\', which are removed from the ! output). `c\' `TEXT' Delete the lines matching the address or address-range, and output the lines of text which follow this command (each but the last ! ending with a `\', which are removed from the output) in place of ! the last line (or in place of each line, if no addresses were specified). A new cycle is started after this command is done, since the pattern space will have been deleted. *************** *** 763,786 **** specified on the command line is used. The N parameter is a GNU `sed' extension. - `L N' - This GNU `sed' extension fills and joins lines in pattern space to - produce output lines of (at most) N characters, like `fmt' does; - if N is omitted, the default as specified on the command line is - used. - - Blank lines, spaces between words, and indentation are preserved - in the output; successive input lines with different indentation - are not joined; tabs are expanded to 8 columns. - - If pattern space contains multiple lines, they are joined, but - since pattern space usually contains a single line, the behavior - of a simple `L;d' script is the same as `fmt -s' (i.e. it does - not join short lines to form longer ones). - - N specifies the desired line-wrap length; if omitted, the default - as specified on the command line is used. - `r FILENAME' [At most one address allowed.] --- 757,762 ---- *************** *** 793,808 **** supported for the file name, which reads the contents of the standard input. - `R FILENAME' - Queue a line of FILENAME to be read and inserted into the output - stream at the end of the current cycle, or when the next input - line is read. Note that if FILENAME cannot be read, or if its end - is reached, no line is appended, without any error indication. - - As with the `r' command, the special value `/dev/stdin' is - supported for the file name, which reads a line from the standard - input. - `w FILENAME' Write the pattern space to FILENAME. As a GNU `sed' extension, two special values of FILE-NAME are supported: `/dev/stderr', --- 769,774 ---- *************** *** 849,858 **** ---------- Footnotes ---------- ! (1) This is equivalent to `p' unless the `-i' switch is being used.  ! File: sed.info, Node: Programming Commands, Next: SSED-specific Commands, Prev: Other Commands, Up: sed Programs Commands for `sed' gurus ======================== --- 815,824 ---- ---------- Footnotes ---------- ! (1) This is equivalent to `p' unless the `-i' option is being used.  ! File: sed.info, Node: Programming Commands, Next: Extended Commands, Prev: Other Commands, Up: sed Programs Commands for `sed' gurus ======================== *************** *** 877,945 **** since the last input line was read or conditional branch was taken. The LABEL may be omitted, in which case the next cycle is started. - `T LABEL' - Branch to LABEL only if there have been no successful - `s'ubstitutions since the last input line was read or conditional - branch was taken. The LABEL may be omitted, in which case the next - cycle is started. -  ! File: sed.info, Node: SSED-specific Commands, Next: Escapes, Prev: Programming Commands, Up: sed Programs ! Commands specific of GNU `sed' ============================== ! These commands are specific of GNU `sed', so you must use them with ! care and only when you are sure that hindering portability is not so ! evil. They allow to check for GNU `sed' extensions or to do tasks that ! are required quite often, yet unsupported by standard `sed's. ! ! `v' ! This command does nothing, but will make `sed' fail if GNU `sed' ! extensions are not supported, simply because other implementations ! of `sed' do not implement it. ! ! `Q [EXIT-CODE]' ! This command is the same as `q', but will not print the contents ! of pattern space. Like `q', it provides the ability to return an ! exit code to the caller. ! ! This command can be useful because the only alternative ways to ! accomplish this apparently trivial function are to use the `-n' ! option (which can unnecessarily complicate your script) or ! resorting to the following snippet, which wastes time by reading ! the whole file without any visible effect: ! ! :eat ! $d # Quit silently on the last line ! N # Read another line, silently ! g # Overwrite pattern space each time to save memory ! b eat ! ! `T LABEL' ! Branch to LABEL only if there have been no successful ! `s'ubstitutions since the last input line was read or conditional ! branch was taken. The LABEL may be omitted, in which case the next ! cycle is started. `e [COMMAND]' This command allows one to pipe input from a shell command into pattern space. Without parameters, the `e' command executes the ! command that is found in pattern space and replaces pattern space ! with the output; a trailing new-line is suppressed. If a parameter is specified, instead, the `e' command interprets it as a command and sends it to the output stream (like `r' does). The command can run across multiple lines, all but the last ending with a back-slash. ! In both cases, results are undefined if the command to be executed ! contains a `nul' character. ! ! `W FILENAME' ! Write to the given filename the portion of the pattern space up to ! the first newline. Everything said under the `w' command about ! file handling holds here too. `L N' This GNU `sed' extension fills and joins lines in pattern space to --- 843,872 ---- since the last input line was read or conditional branch was taken. The LABEL may be omitted, in which case the next cycle is started.  ! File: sed.info, Node: Extended Commands, Next: Escapes, Prev: Programming Commands, Up: sed Programs ! Commands Specific to GNU `sed' ============================== ! These commands are specific to GNU `sed', so you must use them with ! care and only when you are sure that hindering portability is not evil. ! They allow you to check for GNU `sed' extensions or to do tasks that ! are required quite often, yet are unsupported by standard `sed's. `e [COMMAND]' This command allows one to pipe input from a shell command into pattern space. Without parameters, the `e' command executes the ! command that is found in pattern space and replaces the pattern ! space with the output; a trailing new-line is suppressed. If a parameter is specified, instead, the `e' command interprets it as a command and sends it to the output stream (like `r' does). The command can run across multiple lines, all but the last ending with a back-slash. ! In both cases, the results are undefined if the command to be ! executed contains a NUL character. `L N' This GNU `sed' extension fills and joins lines in pattern space to *************** *** 951,964 **** in the output; successive input lines with different indentation are not joined; tabs are expanded to 8 columns. ! If pattern space contains multiple lines, they are joined, but ! since pattern space usually contains a single line, the behavior ! of a simple `L;d' script is the same as `fmt -s' (i.e. it does ! not join short lines to form longer ones). N specifies the desired line-wrap length; if omitted, the default as specified on the command line is used. `R FILENAME' Queue a line of FILENAME to be read and inserted into the output stream at the end of the current cycle, or when the next input --- 878,908 ---- in the output; successive input lines with different indentation are not joined; tabs are expanded to 8 columns. ! If the pattern space contains multiple lines, they are joined, but ! since the pattern space usually contains a single line, the ! behavior of a simple `L;d' script is the same as `fmt -s' (i.e., ! it does not join short lines to form longer ones). N specifies the desired line-wrap length; if omitted, the default as specified on the command line is used. + `Q [EXIT-CODE]' + This command is the same as `q', but will not print the contents + of pattern space. Like `q', it provides the ability to return an + exit code to the caller. + + This command can be useful because the only alternative ways to + accomplish this apparently trivial function are to use the `-n' + option (which can unnecessarily complicate your script) or + resorting to the following snippet, which wastes time by reading + the whole file without any visible effect: + + :eat + $d Quit silently on the last line + N Read another line, silently + g Overwrite pattern space each time to save memory + b eat + `R FILENAME' Queue a line of FILENAME to be read and inserted into the output stream at the end of the current cycle, or when the next input *************** *** 969,988 **** supported for the file name, which reads a line from the standard input.  ! File: sed.info, Node: Escapes, Prev: SSED-specific Commands, Up: sed Programs ! GNU extensions for escapes in regular expressions ================================================= ! Until this chapter, you have only encountered escapes of the form `\^', which tell `sed' not to interpret the caret as a special character, but rather to take it literally. For example, `\*' matches a single asterisk rather than zero or more backslashes. ! This chapter introduces another kind of escapes(1)--that is, escapes that are applied to a character or sequence of characters that ! ordinarily is taken literally, and that `sed' replaces with a special character. This provides a way of encoding non-printable characters in patterns in a visible manner. There is no restriction on the appearance of non-printing characters in a `sed' script but when a --- 913,948 ---- supported for the file name, which reads a line from the standard input. + `T LABEL' + Branch to LABEL only if there have been no successful + `s'ubstitutions since the last input line was read or conditional + branch was taken. The LABEL may be omitted, in which case the next + cycle is started. + + `v' + This command does nothing, but makes `sed' fail if GNU `sed' + extensions are not supported, simply because other versions of + `sed' do not implement it. + + `W FILENAME' + Write to the given filename the portion of the pattern space up to + the first newline. Everything said under the `w' command about + file handling holds here too. +  ! File: sed.info, Node: Escapes, Prev: Extended Commands, Up: sed Programs ! GNU Extensions for Escapes in Regular Expressions ================================================= ! Until this chapter, we have only encountered escapes of the form `\^', which tell `sed' not to interpret the caret as a special character, but rather to take it literally. For example, `\*' matches a single asterisk rather than zero or more backslashes. ! This chapter introduces another kind of escape(1)--that is, escapes that are applied to a character or sequence of characters that ! ordinarily are taken literally, and that `sed' replaces with a special character. This provides a way of encoding non-printable characters in patterns in a visible manner. There is no restriction on the appearance of non-printing characters in a `sed' script but when a *************** *** 1014,1020 **** Produces or matches `CONTROL-X', where X is any character. The precise effect of `\cX' is as follows: if X is a lower case letter, it is converted to upper case. Then bit 6 of the ! character (hex 40) is inverted. Thus "\cz" becomes hex 1A, but `\c{' becomes hex 3B, while `\c;' becomes hex 7B. `\dXXX' --- 974,980 ---- Produces or matches `CONTROL-X', where X is any character. The precise effect of `\cX' is as follows: if X is a lower case letter, it is converted to upper case. Then bit 6 of the ! character (hex 40) is inverted. Thus `\cz' becomes hex 1A, but `\c{' becomes hex 3B, while `\c;' becomes hex 7B. `\dXXX' *************** *** 1034,1065 **** regular expressions: `\s' ! Matches any whitespace character `\S' ! Matches any character that is not a whitespace character `\w' Matches any "word" character. A "word" character is any letter or digit or the underscore character. `\W' ! Matches any "non-word" character ---------- Footnotes ---------- ! (1) All the escapes that are introduced in this character areGNU ! extensions, with the exception of `\n'. In basic regular expression ! mode, setting `POSIXLY_CORRECT' disables them.  File: sed.info, Node: Examples, Next: Limitations, Prev: sed Programs, Up: Top ! Some sample scripts ******************* Here are some `sed' scripts to guide you in the art of mastering ! `sed'... * Menu: --- 994,1025 ---- regular expressions: `\s' ! Matches any whitespace character. `\S' ! Matches any character that is not a whitespace character. `\w' Matches any "word" character. A "word" character is any letter or digit or the underscore character. `\W' ! Matches any "non-word" character. ---------- Footnotes ---------- ! (1) All the escapes introduced here are GNU extensions, with the ! exception of `\n'. In basic regular expression mode, setting ! `POSIXLY_CORRECT' disables them.  File: sed.info, Node: Examples, Next: Limitations, Prev: sed Programs, Up: Top ! Some Sample Scripts ******************* Here are some `sed' scripts to guide you in the art of mastering ! `sed'. * Menu: *************** *** 1087,1097 ****  File: sed.info, Node: Centering lines, Next: Increment a number, Up: Examples ! Centering lines =============== ! This script will center all lines of a file on a 80 columns width. ! To change that width, the number in `\{\}' must be replaced, and the number of added spaces also must be changed. Note how the buffer commands are used to separate parts in the --- 1047,1057 ----  File: sed.info, Node: Centering lines, Next: Increment a number, Up: Examples ! Centering Lines =============== ! This script centers all lines of a file on a 80 columns width. To ! change that width, the number in `\{...\}' must be replaced, and the number of added spaces also must be changed. Note how the buffer commands are used to separate parts in the *************** *** 1124,1134 ****  File: sed.info, Node: Increment a number, Next: Rename files to lower case, Prev: Centering lines, Up: Examples ! Increment a number ================== This script is one of a few that demonstrate how to do arithmetic in ! `sed'. This is indeed possible(1), but must be done manually. To increment one number you just add 1 to last digit, replacing it by the following digit. There is one exception: when the digit is a --- 1084,1094 ----  File: sed.info, Node: Increment a number, Next: Rename files to lower case, Prev: Centering lines, Up: Examples ! Increment a Number ================== This script is one of a few that demonstrate how to do arithmetic in ! `sed'. This is indeed possible,(1) but must be done manually. To increment one number you just add 1 to last digit, replacing it by the following digit. There is one exception: when the digit is a *************** *** 1137,1152 **** This solution by Bruno Haible is very clever and smart because it uses a single buffer; if you don't have this limitation, the algorithm ! used in *Note Numbering lines: cat -n is faster. It works by replacing ! trailing nines with an underscore, then using multiple `s' commands to ! increment the last digit, and then again substituting underscores with ! zeros. #!/usr/bin/sed -f /[^0-9]/ d ! # replace all leading 9s by _ (any other char except digits, could # be used) :d s/9\(_*\)$/_\1/ --- 1097,1112 ---- This solution by Bruno Haible is very clever and smart because it uses a single buffer; if you don't have this limitation, the algorithm ! used in *Note Numbering lines: cat -n, is faster. It works by ! replacing trailing nines with an underscore, then using multiple `s' ! commands to increment the last digit, and then again substituting ! underscores with zeros. #!/usr/bin/sed -f /[^0-9]/ d ! # replace all leading 9s by _ (any other character except digits, could # be used) :d s/9\(_*\)$/_\1/ *************** *** 1180,1186 ****  File: sed.info, Node: Rename files to lower case, Next: Print bash environment, Prev: Increment a number, Up: Examples ! Rename files to lower case ========================== This is a pretty strange use of `sed'. We transform text, and --- 1140,1146 ----  File: sed.info, Node: Rename files to lower case, Next: Print bash environment, Prev: Increment a number, Up: Examples ! Rename Files to Lower Case ========================== This is a pretty strange use of `sed'. We transform text, and *************** *** 1194,1204 **** parameterized using shell variables and proper quoting. #! /bin/sh ! # rename files to lower/upper case... # ! # usage: ! # move-to-lower * ! # move-to-upper * # or # move-to-lower -R . # move-to-upper -R . --- 1154,1164 ---- parameterized using shell variables and proper quoting. #! /bin/sh ! # rename files to lower/upper case... # ! # usage: ! # move-to-lower * ! # move-to-upper * # or # move-to-lower -R . # move-to-upper -R . *************** *** 1224,1237 **** } apply_cmd='sh' ! finder='echo $* | tr " " "\n"' files_only= while : do case "$1" in -n) apply_cmd='cat' ;; ! -R) finder='find $* -type f';; -h) help ; exit 1 ;; *) break ;; esac --- 1184,1197 ---- } apply_cmd='sh' ! finder='echo "$|" tr " " "\n"' files_only= while : do case "$1" in -n) apply_cmd='cat' ;; ! -R) finder='find "$-"type f';; -h) help ; exit 1 ;; *) break ;; esac *************** *** 1239,1245 **** done if [ -z "$1" ]; then ! echo Usage: $0 [-n] [-r] files... exit 1 fi --- 1199,1205 ---- done if [ -z "$1" ]; then ! echo Usage: $0 [-h] [-n] [-r] files... exit 1 fi *************** *** 1256,1262 **** # remove all trailing slashes s/\/*$// ! # add ./ if there are no path, only filename /\//! s/^/.\// # save path+filename --- 1216,1222 ---- # remove all trailing slashes s/\/*$// ! # add ./ if there is no path, only a filename /\//! s/^/.\// # save path+filename *************** *** 1273,1287 **** x # add converted file name to line, which now contains ! # PATH/FILE-NAME\nCONVERTED-FILE-NAME G # check if converted file name is equal to original file name, # if it is, do not print nothing /^.*\/\(.*\)\n\1/b ! # now, transform `PATH/FROMFILE\nTOFILE', into ! # `mv PATH/FROMFILE PATH/TOFILE' and print it s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p ' | $apply_cmd --- 1233,1247 ---- x # add converted file name to line, which now contains ! # path/file-name\nconverted-file-name G # check if converted file name is equal to original file name, # if it is, do not print nothing /^.*\/\(.*\)\n\1/b ! # now, transform path/fromfile\n, into ! # mv path/fromfile path/tofile and print it s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p ' | $apply_cmd *************** *** 1289,1296 ****  File: sed.info, Node: Print bash environment, Next: Reverse chars of lines, Prev: Rename files to lower case, Up: Examples ! Print bash environment ! ====================== This script strips the definition of the shell functions from the output of the `set' Bourne-shell command. --- 1249,1256 ----  File: sed.info, Node: Print bash environment, Next: Reverse chars of lines, Prev: Rename files to lower case, Up: Examples ! Print `bash' Environment ! ======================== This script strips the definition of the shell functions from the output of the `set' Bourne-shell command. *************** *** 1300,1307 **** set | sed -n ' :x ! # if no occurrence of `=()' print and load next line ! /=() /! { p; b; } # possible start of functions section # save the line in case this is a var like FOO="() " --- 1260,1268 ---- set | sed -n ' :x ! # if no occurrence of "=()" print and load next line ! /=()/! { p; b; } ! / () $/! { p; b; } # possible start of functions section # save the line in case this is a var like FOO="() " *************** *** 1322,1329 ****  File: sed.info, Node: Reverse chars of lines, Next: tac, Prev: Print bash environment, Up: Examples ! Reverse chars of lines ! ====================== This script can be used to reverse the position of characters in lines. The technique moves two characters at a time, hence it is --- 1283,1290 ----  File: sed.info, Node: Reverse chars of lines, Next: tac, Prev: Print bash environment, Up: Examples ! Reverse Characters of Lines ! =========================== This script can be used to reverse the position of characters in lines. The technique moves two characters at a time, hence it is *************** *** 1332,1339 **** Note the `tx' command before the definition of the label. This is often needed to reset the flag that is tested by the `t' command. ! Imaginative readers will find uses to this script. An example is ! reversing the output of `banner'(1). #!/usr/bin/sed -f --- 1293,1300 ---- Note the `tx' command before the definition of the label. This is often needed to reset the flag that is tested by the `t' command. ! Imaginative readers will find uses for this script. An example is ! reversing the output of `banner'.(1) #!/usr/bin/sed -f *************** *** 1364,1367 **** --- 1325,1355 ---- banner -w $1 $2 $3 $4 | sed -e :a -e '/^.\{0,'$1'\}$/ { s/$/ /; ba; }' | ~/sedscripts/reverseline.sed + +  + File: sed.info, Node: tac, Next: cat -n, Prev: Reverse chars of lines, Up: Examples + + Reverse Lines of Files + ====================== + + This one begins a series of totally useless (yet interesting) + scripts emulating various Unix commands. This, in particular, is a + `tac' workalike. + + Note that on implementations other than GNU `sed' this script might + easily overflow internal buffers. + + #!/usr/bin/sed -nf + + # reverse all lines of input, i.e. first line became last, ... + + # from the second line, the buffer (which contains all previous lines) + # is *appended* to current line, so, the order will be reversed + 1! G + + # on the last line we're done -- print everything + $ p + + # store everything on the buffer again + h diff -rNC3 sed-4.0.3/doc/sed.info-2 sed-4.0.4/doc/sed.info-2 *** sed-4.0.3/doc/sed.info-2 Thu Nov 21 12:45:10 2002 --- sed-4.0.4/doc/sed.info-2 Thu Dec 12 20:14:25 2002 *************** *** 1,41 **** This is sed.info, produced by makeinfo version 4.2 from sed.texi. START-INFO-DIR-ENTRY * sed: (sed). Stream EDitor. END-INFO-DIR-ENTRY  - File: sed.info, Node: tac, Next: cat -n, Prev: Reverse chars of lines, Up: Examples - - Reverse lines of files - ====================== - - This one begins a series of totally useless (yet interesting) - scripts emulating various Unix commands. This, in particular, is a - `tac' workalike. - - Note that on implementations other than GNU `sed' and GNU `sed' this - script might easily overflow internal buffers. - - #!/usr/bin/sed -nf - - # reverse all lines of input, i.e. first line became last, ... - - # from the second line, the buffer (which contains all previous lines) - # is *appended* to current line, so, the order will be reversed - 1! G - - # on the last line we're done - print everything - $ p - - # store everything on the buffer again - h - -  File: sed.info, Node: cat -n, Next: cat -b, Prev: tac, Up: Examples ! Numbering lines =============== This script replaces `cat -n'; in fact it formats its output exactly --- 1,15 ---- This is sed.info, produced by makeinfo version 4.2 from sed.texi. + INFO-DIR-SECTION Text creation and manipulation START-INFO-DIR-ENTRY * sed: (sed). Stream EDitor. END-INFO-DIR-ENTRY  File: sed.info, Node: cat -n, Next: cat -b, Prev: tac, Up: Examples ! Numbering Lines =============== This script replaces `cat -n'; in fact it formats its output exactly *************** *** 54,60 **** ' It uses `sed' to print the line number, then groups lines two by two ! using N. Of course, this script does not teach as much as the one presented below. The algorithm used for incrementing uses both buffers, so the line --- 28,34 ---- ' It uses `sed' to print the line number, then groups lines two by two ! using `N'. Of course, this script does not teach as much as the one presented below. The algorithm used for incrementing uses both buffers, so the line *************** *** 62,68 **** so that changing digits go in a buffer and unchanged ones go in the other; the changed digits are modified in a single step (using a `y' command). The line number for the next line is then composed and ! stored in hold space, to be used in the next iteration. #!/usr/bin/sed -nf --- 36,42 ---- so that changing digits go in a buffer and unchanged ones go in the other; the changed digits are modified in a single step (using a `y' command). The line number for the next line is then composed and ! stored in the hold space, to be used in the next iteration. #!/usr/bin/sed -nf *************** *** 104,110 ****  File: sed.info, Node: cat -b, Next: wc -c, Prev: cat -n, Up: Examples ! Numbering non-blank lines ========================= Emulating `cat -b' is almost the same as `cat -n'--we only have to --- 78,84 ----  File: sed.info, Node: cat -b, Next: wc -c, Prev: cat -n, Up: Examples ! Numbering Non-blank Lines ========================= Emulating `cat -b' is almost the same as `cat -n'--we only have to *************** *** 144,156 ****  File: sed.info, Node: wc -c, Next: wc -w, Prev: cat -b, Up: Examples ! Counting chars ! ============== This script shows another way to do arithmetic with `sed'. In this case we have to add possibly large numbers, so implementing this by successive increments would not be feasible (and possibly even more ! complicated to contrive than this script...). The approach is to map numbers to letters, kind of an abacus implemented with `sed'. `a's are units, `b's are tenths and so on: we --- 118,130 ----  File: sed.info, Node: wc -c, Next: wc -w, Prev: cat -b, Up: Examples ! Counting Characters ! =================== This script shows another way to do arithmetic with `sed'. In this case we have to add possibly large numbers, so implementing this by successive increments would not be feasible (and possibly even more ! complicated to contrive than this script). The approach is to map numbers to letters, kind of an abacus implemented with `sed'. `a's are units, `b's are tenths and so on: we *************** *** 217,223 ****  File: sed.info, Node: wc -w, Next: wc -l, Prev: wc -c, Up: Examples ! Counting words ============== This script is almost the same as the previous one, once each of the --- 191,197 ----  File: sed.info, Node: wc -w, Next: wc -l, Prev: wc -c, Up: Examples ! Counting Words ============== This script is almost the same as the previous one, once each of the *************** *** 226,232 **** It is interesting that real `wc' programs have optimized loops for `wc -c', so they are much slower at counting words rather than ! characters. These scripts' bottleneck, instead, is arithmetic, and hence the word-counting one is faster (it has to manage smaller numbers). --- 200,206 ---- It is interesting that real `wc' programs have optimized loops for `wc -c', so they are much slower at counting words rather than ! characters. This script's bottleneck, instead, is arithmetic, and hence the word-counting one is faster (it has to manage smaller numbers). *************** *** 275,281 ****  File: sed.info, Node: wc -l, Next: head, Prev: wc -w, Up: Examples ! Counting lines ============== No strange things are done now, because `sed' gives us `wc -l' --- 249,255 ----  File: sed.info, Node: wc -l, Next: head, Prev: wc -w, Up: Examples ! Counting Lines ============== No strange things are done now, because `sed' gives us `wc -l' *************** *** 287,293 ****  File: sed.info, Node: head, Next: tail, Prev: wc -l, Up: Examples ! Printing the first lines ======================== This script is probably the simplest useful `sed' script. It --- 261,267 ----  File: sed.info, Node: head, Next: tail, Prev: wc -l, Up: Examples ! Printing the First Lines ======================== This script is probably the simplest useful `sed' script. It *************** *** 300,306 ****  File: sed.info, Node: tail, Next: uniq, Prev: head, Up: Examples ! Printing the last lines ======================= Printing the last N lines rather than the first is more complex but --- 274,280 ----  File: sed.info, Node: tail, Next: uniq, Prev: head, Up: Examples ! Printing the Last Lines ======================= Printing the last N lines rather than the first is more complex but *************** *** 308,314 **** character. This script is similar to the `tac' script in that it keeps the ! final output in hold space and prints it at the end: #!/usr/bin/sed -nf --- 282,288 ---- character. This script is similar to the `tac' script in that it keeps the ! final output in the hold space and prints it at the end: #!/usr/bin/sed -nf *************** *** 327,338 **** To introduce the technique, which is fully demonstrated in the rest of this chapter and is based on the `N', `P' and `D' commands, here is ! an implementation of `tail' using a simple `sliding window'. This looks complicated but in fact the working is the same as the last script: after we have kicked in the appropriate number of lines, ! however, we stop using hold space to keep inter-line state, and instead ! use `N' and `D' to slide pattern space by one line: #!/usr/bin/sed -f --- 301,312 ---- To introduce the technique, which is fully demonstrated in the rest of this chapter and is based on the `N', `P' and `D' commands, here is ! an implementation of `tail' using a simple "sliding window." This looks complicated but in fact the working is the same as the last script: after we have kicked in the appropriate number of lines, ! however, we stop using the hold space to keep inter-line state, and ! instead use `N' and `D' to slide pattern space by one line: #!/usr/bin/sed -f *************** *** 346,352 ****  File: sed.info, Node: uniq, Next: uniq -d, Prev: tail, Up: Examples ! Make duplicate lines unique =========================== This is an example of the art of using the `N', `P' and `D' --- 320,326 ----  File: sed.info, Node: uniq, Next: uniq -d, Prev: tail, Up: Examples ! Make Duplicate Lines Unique =========================== This is an example of the art of using the `N', `P' and `D' *************** *** 356,362 **** h :b ! On the last line, print and exit $b N /^\(.*\)\n\1$/ { --- 330,336 ---- h :b ! # On the last line, print and exit $b N /^\(.*\)\n\1$/ { *************** *** 380,386 ****  File: sed.info, Node: uniq -d, Next: uniq -u, Prev: uniq, Up: Examples ! Print duplicated lines of input =============================== This script prints only duplicated lines, like `uniq -d'. --- 354,360 ----  File: sed.info, Node: uniq -d, Next: uniq -u, Prev: uniq, Up: Examples ! Print Duplicated Lines of Input =============================== This script prints only duplicated lines, like `uniq -d'. *************** *** 414,427 ****  File: sed.info, Node: uniq -u, Next: cat -s, Prev: uniq -d, Up: Examples ! Remove all duplicated lines =========================== This script prints only unique lines, like `uniq -u'. #!/usr/bin/sed -f ! # Search for a duplicate line -- until that, print what you find. $b N /^\(.*\)\n\1$/ ! { --- 388,401 ----  File: sed.info, Node: uniq -u, Next: cat -s, Prev: uniq -d, Up: Examples ! Remove All Duplicated Lines =========================== This script prints only unique lines, like `uniq -u'. #!/usr/bin/sed -f ! # Search for a duplicate line --- until that, print what you find. $b N /^\(.*\)\n\1$/ ! { *************** *** 449,455 ****  File: sed.info, Node: cat -s, Prev: uniq -u, Up: Examples ! Squeezing blank lines ===================== As a final example, here are three scripts, of increasing complexity --- 423,429 ----  File: sed.info, Node: cat -s, Prev: uniq -u, Up: Examples ! Squeezing Blank Lines ===================== As a final example, here are three scripts, of increasing complexity *************** *** 470,476 **** } # now, squeeze all '\n', this can be also done by: ! # `s/^\(\n\)*/\1/' s/\n*/\ / --- 444,450 ---- } # now, squeeze all '\n', this can be also done by: ! # s/^\(\n\)*/\1/ s/\n*/\ / *************** *** 509,515 **** p # get next n ! # got chars? print it again, etc... /./bx # no, don't have chars: got an empty line --- 483,489 ---- p # get next n ! # got chars? print it again, etc... /./bx # no, don't have chars: got an empty line *************** *** 530,545 ****  File: sed.info, Node: Limitations, Next: Other Resources, Prev: Examples, Up: Top ! GNU `sed''s limitations and non-limitations ******************************************* For those who want to write portable `sed' scripts, be aware that some implementations have been known to limit line lengths (for the ! pattern and hold spaces) to be no more than 4000 bytes. The POSIX.2 standard specifies that conforming `sed' implementations shall support at least 8192 byte line lengths. GNU `sed' has no built-in limit on ! line length; as long as it can malloc() more (virtual) memory, you can ! feed or construct lines as long as you care. However, recursion is used to handle subpatterns and indefinite repetition. This means that the available stack space may limit the --- 504,519 ----  File: sed.info, Node: Limitations, Next: Other Resources, Prev: Examples, Up: Top ! GNU `sed''s Limitations and Non-limitations ******************************************* For those who want to write portable `sed' scripts, be aware that some implementations have been known to limit line lengths (for the ! pattern and hold spaces) to be no more than 4000 bytes. The POSIX standard specifies that conforming `sed' implementations shall support at least 8192 byte line lengths. GNU `sed' has no built-in limit on ! line length; as long as it can `malloc()' more (virtual) memory, you ! can feed or construct lines as long as you like. However, recursion is used to handle subpatterns and indefinite repetition. This means that the available stack space may limit the *************** *** 548,577 ****  File: sed.info, Node: Other Resources, Next: Reporting Bugs, Prev: Limitations, Up: Top ! Other resources for learning about `sed' **************************************** In addition to several books that have been written about `sed' (either specifically or as chapters in books which discuss shell programming), one can find out more about `sed' (including suggestions ! of a few books) from the FAQ for the sed-users mailing list, available ! from any of: `http://www.student.northpark.edu/pemente/sed/sedfaq.html' `http://sed.sf.net/grabbag/tutorials/sedfaq.html' Also of interest are `http://www.student.northpark.edu/pemente/sed/index.htm' and ! `http://sed.sf.net/grabbag', which include sed tutorials and other ! sed-related goodies. ! There is a "sed-users" mailing list maintained by Sven Guckes. To subscribe, visit `http://groups.yahoo.com' and search for the `sed-users' mailing list.  File: sed.info, Node: Reporting Bugs, Next: Extended regexps, Prev: Other Resources, Up: Top ! Reporting bugs ************** Email bug reports to . Be sure to include the word --- 522,551 ----  File: sed.info, Node: Other Resources, Next: Reporting Bugs, Prev: Limitations, Up: Top ! Other Resources for Learning About `sed' **************************************** In addition to several books that have been written about `sed' (either specifically or as chapters in books which discuss shell programming), one can find out more about `sed' (including suggestions ! of a few books) from the FAQ for the `sed-users' mailing list, ! available from any of: `http://www.student.northpark.edu/pemente/sed/sedfaq.html' `http://sed.sf.net/grabbag/tutorials/sedfaq.html' Also of interest are `http://www.student.northpark.edu/pemente/sed/index.htm' and ! `http://sed.sf.net/grabbag', which include `sed' tutorials and other ! `sed'-related goodies. ! The `sed-users' mailing list itself maintained by Sven Guckes. To subscribe, visit `http://groups.yahoo.com' and search for the `sed-users' mailing list.  File: sed.info, Node: Reporting Bugs, Next: Extended regexps, Prev: Other Resources, Up: Top ! Reporting Bugs ************** Email bug reports to . Be sure to include the word *************** *** 580,588 **** Please do not send a bug report like this: ! [while building frobme-1.3.4] ! $ configure ! sed: file sedscr line 1: Unknown option to 's' If GNU `sed' doesn't configure your favorite package, take a few extra minutes to identify the specific problem and make a stand-alone --- 554,562 ---- Please do not send a bug report like this: ! while building frobme-1.3.4 ! $ configure ! error--> sed: file sedscr line 1: Unknown option to 's' If GNU `sed' doesn't configure your favorite package, take a few extra minutes to identify the specific problem and make a stand-alone *************** *** 598,620 **** Here are a few commonly reported bugs that are not bugs. ! `sed -n' and `s/regex/replace/p' ! Some versions of sed ignore the `p' (print) option of an `s' ! command unless the `-n' command switch has been specified. Other ! versions always honor the `p' option. Both approaches are allowed ! by POSIX.2 and GNU `sed' (on which GNU `sed' is based) is the ! latter sort; I judge this approach to be better (give enough rope ! etc.) when you write complex scripts, but portable scripts should ! be written to work correctly with either behavior. ! regex syntax clashes ! `sed' uses the Posix basic regular expression syntax. According to the standard, the meaning of some escape sequences is undefined in this syntax; notable in the case of `sed' are `\|', `\+', `\?', `\`', `\'', `\<', `\>', `\b', `\B', `\w', and `\W'. ! As in all GNU programs that use Posix basic regular expressions, ! sed interprets these escape sequences as meta-characters. So, `x\+' matches one or more occurrences of `x'. `abc\|def' matches either `abc' or `def'. --- 572,614 ---- Here are a few commonly reported bugs that are not bugs. ! `sed -n' and `s/REGEX/`replace'/p' ! Some versions of `sed' ignore the `p' (print) option of an `s' ! command unless the `-n' command-line option has been specified. ! Other versions always honor the `p' option. Both approaches are ! allowed by POSIX and GNU `sed' is the better when you write ! complex scripts and also more intuitive, but portable scripts ! should be written to work correctly with either behavior. ! ! `N' command on the last line ! Most versions of `sed' exit without printing anything when the `N' ! command is issued on the last line of a file. GNU `sed' prints ! pattern space before exiting unless of course the `-n' command ! switch has been specified. This choice is by design. ! ! For example, the behavior of ! sed N foo bar ! ! would depend on whether foo has an even or an odd number of ! lines(1). Or, when writing a script to read the next few lines ! following a pattern match, traditional implementations of `sed' ! would force you to write something like ! /foo/{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N } ! ! instead of just ! /foo/{ N;N;N;N;N;N;N;N;N; } ! In any case, the simplest workaround is to use `$d;N' in scripts ! that rely on the traditional behavior. ! ! Regex syntax clashes ! `sed' uses the POSIX basic regular expression syntax. According to the standard, the meaning of some escape sequences is undefined in this syntax; notable in the case of `sed' are `\|', `\+', `\?', `\`', `\'', `\<', `\>', `\b', `\B', `\w', and `\W'. ! As in all GNU programs that use POSIX basic regular expressions, ! `sed' interprets these escape sequences as meta-characters. So, `x\+' matches one or more occurrences of `x'. `abc\|def' matches either `abc' or `def'. *************** *** 623,629 **** assumption that `\|' and `\+' match the literal characters `|' and `+'. Such scripts must be modified by removing the spurious backslashes if they are to be used with modern implementations of ! `sed', like GNU `sed' or GNU `sed'. In addition, this version of `sed' supports several escape characters (some of which are multi-character) to insert --- 617,623 ---- assumption that `\|' and `\+' match the literal characters `|' and `+'. Such scripts must be modified by removing the spurious backslashes if they are to be used with modern implementations of ! `sed', like GNU `sed'. In addition, this version of `sed' supports several escape characters (some of which are multi-character) to insert *************** *** 632,654 **** written for other `sed's. `-i' clobbers read-only files ! In short, `sed d -i' will let one delete the contents of a ! read-only file, and in general the `-i' option (*note Invocation: ! Invoking sed. will let one clobber protected files. This is not a ! bug, but rather a consequence of how the Unix filesystem works. The permissions on a file say what can happen to the data in that file, while the permissions on a directory say what can happen to the list of files in that directory. `sed -i' will not ever open ! for writing a file that is already on disk, rather, it will work on a temporary file that is finally renamed to the original name: if you rename or delete files, you're actually modifying the contents of the directory, so the operation depends on the ! permissions of the directory, not of the file). For this same ! reason, `sed' will not let one use `-i' on a writeable file in a read-only directory (but unbelievably nobody reports that as a bug...).  File: sed.info, Node: Extended regexps, Next: Concept Index, Prev: Reporting Bugs, Up: Top --- 626,652 ---- written for other `sed's. `-i' clobbers read-only files ! In short, `sed -i' will let you delete the contents of a read-only ! file, and in general the `-i' option (*note Invocation: Invoking ! sed.) lets you clobber protected files. This is not a bug, but ! rather a consequence of how the Unix filesystem works. The permissions on a file say what can happen to the data in that file, while the permissions on a directory say what can happen to the list of files in that directory. `sed -i' will not ever open ! for writing a file that is already on disk. Rather, it will work on a temporary file that is finally renamed to the original name: if you rename or delete files, you're actually modifying the contents of the directory, so the operation depends on the ! permissions of the directory, not of the file. For this same ! reason, `sed' does not let you use `-i' on a writeable file in a read-only directory (but unbelievably nobody reports that as a bug...). + ---------- Footnotes ---------- + + (1) which is the actual "bug" that prompted the change in behavior +  File: sed.info, Node: Extended regexps, Next: Concept Index, Prev: Reporting Bugs, Up: Top *************** *** 707,714 **** * Append pattern space to hold space: Other Commands. * Appending text after a line: Other Commands. * Backreferences, in regular expressions: The "s" Command. ! * Branch to a label, if s/// failed <1>: SSED-specific Commands. ! * Branch to a label, if s/// failed: Programming Commands. * Branch to a label, if s/// succeeded: Programming Commands. * Branch to a label, unconditionally: Programming Commands. * Buffer spaces, pattern and hold: Data Spaces. --- 705,711 ---- * Append pattern space to hold space: Other Commands. * Appending text after a line: Other Commands. * Backreferences, in regular expressions: The "s" Command. ! * Branch to a label, if s/// failed: Extended Commands. * Branch to a label, if s/// succeeded: Programming Commands. * Branch to a label, unconditionally: Programming Commands. * Buffer spaces, pattern and hold: Data Spaces. *************** *** 717,730 **** * Caveat -- #n on first line: Common Commands. * Command groups: Common Commands. * Comments, in scripts: Common Commands. ! * Conditional branch <1>: SSED-specific Commands. * Conditional branch: Programming Commands. * Copy hold space into pattern space: Other Commands. * Copy pattern space into hold space: Other Commands. * Delete first line from pattern space: Other Commands. * Disabling autoprint, from command line: Invoking sed. * empty regular expression: Addresses. ! * Evaluate Bourne-shell commands: SSED-specific Commands. * Evaluate Bourne-shell commands, after substitution: The "s" Command. * Exchange hold space with pattern space: Other Commands. * Excluding lines: Addresses. --- 714,727 ---- * Caveat -- #n on first line: Common Commands. * Command groups: Common Commands. * Comments, in scripts: Common Commands. ! * Conditional branch <1>: Extended Commands. * Conditional branch: Programming Commands. * Copy hold space into pattern space: Other Commands. * Copy pattern space into hold space: Other Commands. * Delete first line from pattern space: Other Commands. * Disabling autoprint, from command line: Invoking sed. * empty regular expression: Addresses. ! * Evaluate Bourne-shell commands: Extended Commands. * Evaluate Bourne-shell commands, after substitution: The "s" Command. * Exchange hold space with pattern space: Other Commands. * Excluding lines: Addresses. *************** *** 735,741 **** * Global substitution: The "s" Command. * GNU extensions, /dev/stderr file <1>: Other Commands. * GNU extensions, /dev/stderr file: The "s" Command. ! * GNU extensions, /dev/stdin file <1>: SSED-specific Commands. * GNU extensions, /dev/stdin file: Other Commands. * GNU extensions, /dev/stdout file <1>: Other Commands. * GNU extensions, /dev/stdout file <2>: The "s" Command. --- 732,738 ---- * Global substitution: The "s" Command. * GNU extensions, /dev/stderr file <1>: Other Commands. * GNU extensions, /dev/stderr file: The "s" Command. ! * GNU extensions, /dev/stdin file <1>: Extended Commands. * GNU extensions, /dev/stdin file: Other Commands. * GNU extensions, /dev/stdout file <1>: Other Commands. * GNU extensions, /dev/stdout file <2>: The "s" Command. *************** *** 744,755 **** * GNU extensions, 0,ADDR2 addressing: Addresses. * GNU extensions, ADDR1,+N addressing: Addresses. * GNU extensions, ADDR1,~N addressing: Addresses. ! * GNU extensions, branch if s/// failed <1>: SSED-specific Commands. ! * GNU extensions, branch if s/// failed: Programming Commands. ! * GNU extensions, case modifiers in `s' commands: The "s" Command. ! * GNU extensions, checking for their presence <1>: SSED-specific Commands. ! * GNU extensions, checking for their presence: Other Commands. ! * GNU extensions, evaluating Bourne-shell commands <1>: SSED-specific Commands. * GNU extensions, evaluating Bourne-shell commands: The "s" Command. * GNU extensions, extended regular expressions: Invoking sed. * GNU extensions, g and NUMBER modifier interaction in s command: The "s" Command. --- 741,750 ---- * GNU extensions, 0,ADDR2 addressing: Addresses. * GNU extensions, ADDR1,+N addressing: Addresses. * GNU extensions, ADDR1,~N addressing: Addresses. ! * GNU extensions, branch if s/// failed: Extended Commands. ! * GNU extensions, case modifiers in s commands: The "s" Command. ! * GNU extensions, checking for their presence: Extended Commands. ! * GNU extensions, evaluating Bourne-shell commands <1>: Extended Commands. * GNU extensions, evaluating Bourne-shell commands: The "s" Command. * GNU extensions, extended regular expressions: Invoking sed. * GNU extensions, g and NUMBER modifier interaction in s command: The "s" Command. *************** *** 757,788 **** * GNU extensions, I modifier: Addresses. * GNU extensions, in-place editing <1>: Reporting Bugs. * GNU extensions, in-place editing: Invoking sed. ! * GNU extensions, L command <1>: SSED-specific Commands. ! * GNU extensions, L command: Other Commands. * GNU extensions, M modifier: The "s" Command. * GNU extensions, modifiers and the empty regular expression: Addresses. * GNU extensions, N~M addresses: Addresses. ! * GNU extensions, quitting silently <1>: SSED-specific Commands. ! * GNU extensions, quitting silently: Other Commands. ! * GNU extensions, R command <1>: SSED-specific Commands. ! * GNU extensions, R command: Other Commands. ! * GNU extensions, reading a file a line at a time <1>: SSED-specific Commands. ! * GNU extensions, reading a file a line at a time: Other Commands. ! * GNU extensions, reformatting paragraphs <1>: SSED-specific Commands. ! * GNU extensions, reformatting paragraphs: Other Commands. ! * GNU extensions, returning an exit code <1>: SSED-specific Commands. ! * GNU extensions, returning an exit code <2>: Other Commands. * GNU extensions, returning an exit code: Common Commands. * GNU extensions, setting line length: Other Commands. * GNU extensions, special escapes <1>: Reporting Bugs. * GNU extensions, special escapes: Escapes. * GNU extensions, special two-address forms: Addresses. ! * GNU extensions, subprocesses <1>: SSED-specific Commands. * GNU extensions, subprocesses: The "s" Command. * GNU extensions, to basic regular expressions <1>: Reporting Bugs. * GNU extensions, to basic regular expressions: Regular Expressions. * GNU extensions, unlimited line length: Limitations. ! * GNU extensions, writing first line to a file: SSED-specific Commands. * Goto, in scripts: Programming Commands. * Greedy regular expression matching: Regular Expressions. * Grouping commands: Common Commands. --- 752,777 ---- * GNU extensions, I modifier: Addresses. * GNU extensions, in-place editing <1>: Reporting Bugs. * GNU extensions, in-place editing: Invoking sed. ! * GNU extensions, L command: Extended Commands. * GNU extensions, M modifier: The "s" Command. * GNU extensions, modifiers and the empty regular expression: Addresses. * GNU extensions, N~M addresses: Addresses. ! * GNU extensions, quitting silently: Extended Commands. ! * GNU extensions, R command: Extended Commands. ! * GNU extensions, reading a file a line at a time: Extended Commands. ! * GNU extensions, reformatting paragraphs: Extended Commands. ! * GNU extensions, returning an exit code <1>: Extended Commands. * GNU extensions, returning an exit code: Common Commands. * GNU extensions, setting line length: Other Commands. * GNU extensions, special escapes <1>: Reporting Bugs. * GNU extensions, special escapes: Escapes. * GNU extensions, special two-address forms: Addresses. ! * GNU extensions, subprocesses <1>: Extended Commands. * GNU extensions, subprocesses: The "s" Command. * GNU extensions, to basic regular expressions <1>: Reporting Bugs. * GNU extensions, to basic regular expressions: Regular Expressions. * GNU extensions, unlimited line length: Limitations. ! * GNU extensions, writing first line to a file: Extended Commands. * Goto, in scripts: Programming Commands. * Greedy regular expression matching: Regular Expressions. * Grouping commands: Common Commands. *************** *** 809,814 **** --- 798,804 ---- * Next input line, append to pattern space: Other Commands. * Next input line, replace pattern space with: Common Commands. * Non-bugs, in-place editing: Reporting Bugs. + * Non-bugs, N command on the last line: Reporting Bugs. * Non-bugs, p command and -n flag <1>: Reporting Bugs. * Non-bugs, p command and -n flag: Common Commands. * Non-bugs, regex syntax clashes: Reporting Bugs. *************** *** 817,822 **** --- 807,813 ---- * Perl-style regular expressions, multiline: Addresses. * Portability, comments: Common Commands. * Portability, line length limitations: Limitations. + * Portability, N command on the last line: Reporting Bugs. * Portability, p command and -n flag <1>: Reporting Bugs. * Portability, p command and -n flag: Common Commands. * POSIXLY_CORRECT behavior, empty regular expression: Addresses. *************** *** 824,848 **** * Print first line from pattern space: Other Commands. * Printing line number: Other Commands. * Printing text unambiguously: Other Commands. ! * Quitting <1>: SSED-specific Commands. ! * Quitting <2>: Other Commands. * Quitting: Common Commands. * Range of lines: Addresses. * Range with start address of zero: Addresses. * Read next input line: Common Commands. ! * Read text from a file <1>: SSED-specific Commands. * Read text from a file: Other Commands. ! * Reformat pattern space <1>: SSED-specific Commands. ! * Reformat pattern space: Other Commands. ! * Reformatting paragraphs <1>: SSED-specific Commands. ! * Reformatting paragraphs: Other Commands. * Replace hold space with copy of pattern space: Other Commands. * Replace pattern space with copy of hold space: Other Commands. * Replacing all text matching regexp in a line: The "s" Command. * Replacing only Nth match of regexp in a line: The "s" Command. * Replacing selected lines with other text: Other Commands. ! * Requiring GNU sed <1>: SSED-specific Commands. ! * Requiring GNU sed: Other Commands. * Script structure: sed Programs. * Script, from a file: Invoking sed. * Script, from command line: Invoking sed. --- 815,835 ---- * Print first line from pattern space: Other Commands. * Printing line number: Other Commands. * Printing text unambiguously: Other Commands. ! * Quitting <1>: Extended Commands. * Quitting: Common Commands. * Range of lines: Addresses. * Range with start address of zero: Addresses. * Read next input line: Common Commands. ! * Read text from a file <1>: Extended Commands. * Read text from a file: Other Commands. ! * Reformat pattern space: Extended Commands. ! * Reformatting paragraphs: Extended Commands. * Replace hold space with copy of pattern space: Other Commands. * Replace pattern space with copy of hold space: Other Commands. * Replacing all text matching regexp in a line: The "s" Command. * Replacing only Nth match of regexp in a line: The "s" Command. * Replacing selected lines with other text: Other Commands. ! * Requiring GNU sed: Extended Commands. * Script structure: sed Programs. * Script, from a file: Invoking sed. * Script, from command line: Invoking sed. *************** *** 855,861 **** * Special addressing forms: Addresses. * Standard input, processing as input: Invoking sed. * Stream editor: Introduction. ! * Subprocesses <1>: SSED-specific Commands. * Subprocesses: The "s" Command. * Substitution of text, options: The "s" Command. * Text, appending: Other Commands. --- 842,848 ---- * Special addressing forms: Addresses. * Standard input, processing as input: Invoking sed. * Stream editor: Introduction. ! * Subprocesses <1>: Extended Commands. * Subprocesses: The "s" Command. * Substitution of text, options: The "s" Command. * Text, appending: Other Commands. *************** *** 869,875 **** * Usage summary, printing: Invoking sed. * Version, printing: Invoking sed. * Working on separate files: Invoking sed. ! * Write first line to a file: SSED-specific Commands. * Write to a file: Other Commands. * Zero, as range start address: Addresses. --- 856,862 ---- * Usage summary, printing: Invoking sed. * Version, printing: Invoking sed. * Working on separate files: Invoking sed. ! * Write first line to a file: Extended Commands. * Write to a file: Other Commands. * Zero, as range start address: Addresses. *************** *** 912,944 **** * c (change to text lines) command: Other Commands. * D (delete first line) command: Other Commands. * d (delete) command: Common Commands. ! * e (evaluate) command: SSED-specific Commands. * G (appending Get) command: Other Commands. * g (get) command: Other Commands. * H (append Hold) command: Other Commands. * h (hold) command: Other Commands. * i (insert text lines) command: Other Commands. ! * L (fLow paragraphs) command <1>: SSED-specific Commands. ! * L (fLow paragraphs) command: Other Commands. * l (list unambiguously) command: Other Commands. * N (append Next line) command: Other Commands. * n (next-line) command: Common Commands. * P (print first line) command: Other Commands. * p (print) command: Common Commands. * q (quit) command: Common Commands. ! * Q (silent Quit) command <1>: SSED-specific Commands. ! * Q (silent Quit) command: Other Commands. * r (read file) command: Other Commands. ! * R (read line) command <1>: SSED-specific Commands. ! * R (read line) command: Other Commands. * s command, option flags: The "s" Command. ! * T (test and branch if failed) command <1>: SSED-specific Commands. ! * T (test and branch if failed) command: Programming Commands. * t (test and branch if successful) command: Programming Commands. ! * v (version) command <1>: SSED-specific Commands. ! * v (version) command: Other Commands. * w (write file) command: Other Commands. ! * W (write first line) command: SSED-specific Commands. * x (eXchange) command: Other Commands. * y (transliterate) command: Other Commands. * {} command grouping: Common Commands. --- 899,926 ---- * c (change to text lines) command: Other Commands. * D (delete first line) command: Other Commands. * d (delete) command: Common Commands. ! * e (evaluate) command: Extended Commands. * G (appending Get) command: Other Commands. * g (get) command: Other Commands. * H (append Hold) command: Other Commands. * h (hold) command: Other Commands. * i (insert text lines) command: Other Commands. ! * L (fLow paragraphs) command: Extended Commands. * l (list unambiguously) command: Other Commands. * N (append Next line) command: Other Commands. * n (next-line) command: Common Commands. * P (print first line) command: Other Commands. * p (print) command: Common Commands. * q (quit) command: Common Commands. ! * Q (silent Quit) command: Extended Commands. * r (read file) command: Other Commands. ! * R (read line) command: Extended Commands. * s command, option flags: The "s" Command. ! * T (test and branch if failed) command: Extended Commands. * t (test and branch if successful) command: Programming Commands. ! * v (version) command: Extended Commands. * w (write file) command: Other Commands. ! * W (write first line) command: Extended Commands. * x (eXchange) command: Other Commands. * y (transliterate) command: Other Commands. * {} command grouping: Common Commands. diff -rNC3 sed-4.0.3/doc/sed.texi sed-4.0.4/doc/sed.texi *** sed-4.0.3/doc/sed.texi Tue Nov 19 20:54:46 2002 --- sed-4.0.4/doc/sed.texi Thu Dec 12 20:06:38 2002 *************** *** 4,9 **** --- 4,16 ---- @c -- Stuff that needs adding: ---------------------------------------------- @c (document the `;' command-separator) @c -------------------------------------------------------------------------- + @c Check for consistency: regexps in @code, text that they match in @samp. + @c + @c Tips: + @c @command for command + @c @samp for command fragments: @samp{cat -s} + @c @code for sed commands and flags + @c Use ``quote'' not `quote' or "quote". @c @c %**start of header @setfilename sed.info *************** *** 22,46 **** @defcodeindex op @syncodeindex op fn ! @direntry ! * sed: (sed). Stream EDitor. ! ! @end direntry ! ! @clear PERL ! @set SSEDEXT @acronym{GNU} extensions ! @set SSED @acronym{GNU} @code{sed} @copying This file documents version @value{VERSION} of @value{SSED}, a stream editor. - @ignore - Published by the Free Software Foundation, @* - 59 Temple Place - Suite 330 @* - Boston, MA 02111-1307, USA - @end ignore - Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. This document is released under the terms of the GNU Free Documentation --- 29,40 ---- @defcodeindex op @syncodeindex op fn ! @include config.texi @copying This file documents version @value{VERSION} of @value{SSED}, a stream editor. Copyright (C) 1998, 1999, 2001, 2002 Free Software Foundation, Inc. This document is released under the terms of the GNU Free Documentation *************** *** 58,64 **** @setchapternewpage off @titlepage ! @title sed, a stream editor @subtitle version @value{VERSION}, @value{UPDATED} @author by Ken Pizzini, Paolo Bonzini --- 52,58 ---- @setchapternewpage off @titlepage ! @title @command{sed}, a stream editor @subtitle version @value{VERSION}, @value{UPDATED} @author by Ken Pizzini, Paolo Bonzini *************** *** 67,72 **** --- 61,70 ---- Copyright @copyright{} 1998, 1999 Free Software Foundation, Inc. @insertcopying + + Published by the Free Software Foundation, @* + 59 Temple Place - Suite 330 @* + Boston, MA 02111-1307, USA @end titlepage *************** *** 80,112 **** @menu * Introduction:: Introduction * Invoking sed:: Invocation ! * sed Programs:: @code{sed} programs * Examples:: Some sample scripts * Limitations:: Limitations and (non-)limitations of @value{SSED} ! * Other Resources:: Other resources for learning about @code{sed} * Reporting Bugs:: Reporting bugs ! * Extended regexps:: @code{egrep}-style regular expressions @ifset PERL * Perl regexps:: Perl-style regular expressions @end ifset * Concept Index:: A menu with all the topics in this manual. ! * Command and Option Index:: A menu with all @code{sed} commands and command-line options. @detailmenu --- The detailed node listing --- sed Programs: ! * Addresses:: Selecting lines with @code{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @code{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @code{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @code{sed} gurus ! * SSED-specific Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters Examples: --- 78,110 ---- @menu * Introduction:: Introduction * Invoking sed:: Invocation ! * sed Programs:: @command{sed} programs * Examples:: Some sample scripts * Limitations:: Limitations and (non-)limitations of @value{SSED} ! * Other Resources:: Other resources for learning about @command{sed} * Reporting Bugs:: Reporting bugs ! * Extended regexps:: @command{egrep}-style regular expressions @ifset PERL * Perl regexps:: Perl-style regular expressions @end ifset * Concept Index:: A menu with all the topics in this manual. ! * Command and Option Index:: A menu with all @command{sed} commands and command-line options. @detailmenu --- The detailed node listing --- sed Programs: ! * Addresses:: Selecting lines with @command{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @command{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @command{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @command{sed} gurus ! * Extended Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters Examples: *************** *** 152,166 **** @chapter Introduction @cindex Stream editor ! @code{sed} is a stream editor. A stream editor is used to perform basic text transformations on an input stream (a file or input from a pipeline). While in some ways similar to an editor which ! permits scripted edits (such as @code{ed}), ! @code{sed} works by making only one pass over the input(s), and is consequently more efficient. ! But it is @code{sed}'s ability to filter text in a pipeline which particularly distinguishes it from other types of editors. --- 150,164 ---- @chapter Introduction @cindex Stream editor ! @command{sed} is a stream editor. A stream editor is used to perform basic text transformations on an input stream (a file or input from a pipeline). While in some ways similar to an editor which ! permits scripted edits (such as @command{ed}), ! @command{sed} works by making only one pass over the input(s), and is consequently more efficient. ! But it is @command{sed}'s ability to filter text in a pipeline which particularly distinguishes it from other types of editors. *************** *** 168,186 **** @node Invoking sed @chapter Invocation ! @code{sed} may be invoked with the following command-line options: ! @table @samp ! @item @code{-V} ! @itemx @code{--version} @opindex -V @opindex --version @cindex Version, printing ! Print out the version of @code{sed} that is being run and a copyright notice, then exit. ! @item @code{-h} ! @itemx @code{--help} @opindex -h @opindex --help @cindex Usage summary, printing --- 166,184 ---- @node Invoking sed @chapter Invocation ! @command{sed} may be invoked with the following command-line options: ! @table @code ! @item -V ! @itemx --version @opindex -V @opindex --version @cindex Version, printing ! Print out the version of @command{sed} that is being run and a copyright notice, then exit. ! @item -h ! @itemx --help @opindex -h @opindex --help @cindex Usage summary, printing *************** *** 188,208 **** and the bug-reporting address, then exit. ! @item @code{-n} ! @itemx @code{--quiet} ! @itemx @code{--silent} @opindex -n @opindex --quiet @opindex --silent @cindex Disabling autoprint, from command line ! By default, @code{sed} will print out the pattern space at the end of each cycle through the script. These options disable this automatic printing, ! and @code{sed} will only produce output when explicitly told to via the @code{p} command. ! @item @code{-i}[@var{SUFFIX}] ! @itemx @code{--in-place[=@var{SUFFIX}]} @opindex -i @opindex --in-place @cindex In-place editing --- 186,206 ---- and the bug-reporting address, then exit. ! @item -n ! @itemx --quiet ! @itemx --silent @opindex -n @opindex --quiet @opindex --silent @cindex Disabling autoprint, from command line ! By default, @command{sed} prints out the pattern space at the end of each cycle through the script. These options disable this automatic printing, ! and @command{sed} only produces output when explicitly told to via the @code{p} command. ! @item -i[@var{SUFFIX}] ! @itemx --in-place[=@var{SUFFIX}] @opindex -i @opindex --in-place @cindex In-place editing *************** *** 210,221 **** This option specifies that files are to be edited in-place. @value{SSED} does this by creating a temporary file and sending output to this file rather than to the standard ! output@footnote{This applies to commands such as @code{=}, @code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can still write to the standard output by using the @code{w} @cindex @value{SSEDEXT}, @file{/dev/stdout} file or @code{W} commands together with the @file{/dev/stdout} ! special file}. When the end of the file is reached, the temporary file is renamed to the output file's original name. --- 208,219 ---- This option specifies that files are to be edited in-place. @value{SSED} does this by creating a temporary file and sending output to this file rather than to the standard ! output.@footnote{This applies to commands such as @code{=}, @code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can still write to the standard output by using the @code{w} @cindex @value{SSEDEXT}, @file{/dev/stdout} file or @code{W} commands together with the @file{/dev/stdout} ! special file} When the end of the file is reached, the temporary file is renamed to the output file's original name. *************** *** 235,267 **** the original files into another directory (provided the directory already exists). ! This option implies @code{-s}. ! @item @code{-l} @var{N} ! @itemx @code{--line-length=@var{N}} @opindex -l @opindex --line-length @cindex Line length, setting ! Specify the default line-wrap length for the 'l' command. A length of 0 (zero) means to never wrap long lines. If not specified, it is taken to be 70. ! @item @code{-r} ! @itemx @code{--regexp-extended} @opindex -r @opindex --regexp-extended @cindex Extended regular expressions, choosing @cindex @acronym{GNU} extensions, extended regular expressions Use extended regular expressions rather than basic regular expressions. Extended regexps are those that ! @code{egrep} accepts; they can be clearer because they ! usually have less backslashes, but are a @sc{gnu} extension ! and hence scripts that use it are not portable. @xref{Extended regexps, , Extended regular expressions}. @ifset PERL ! @item @code{-R} ! @itemx @code{--regexp-perl} @opindex -R @opindex --regexp-perl @cindex Perl-style regular expressions, choosing --- 233,265 ---- the original files into another directory (provided the directory already exists). ! This option implies @option{-s}. ! @item -l @var{N} ! @itemx --line-length=@var{N} @opindex -l @opindex --line-length @cindex Line length, setting ! Specify the default line-wrap length for the @code{l} command. A length of 0 (zero) means to never wrap long lines. If not specified, it is taken to be 70. ! @item -r ! @itemx --regexp-extended @opindex -r @opindex --regexp-extended @cindex Extended regular expressions, choosing @cindex @acronym{GNU} extensions, extended regular expressions Use extended regular expressions rather than basic regular expressions. Extended regexps are those that ! @command{egrep} accepts; they can be clearer because they ! usually have less backslashes, but are a @acronym{GNU} extension ! and hence scripts that use them are not portable. @xref{Extended regexps, , Extended regular expressions}. @ifset PERL ! @item -R ! @itemx --regexp-perl @opindex -R @opindex --regexp-perl @cindex Perl-style regular expressions, choosing *************** *** 273,310 **** Perl-style regular expressions}. @end ifset ! @item @code{-s} ! @itemx @code{--separate} @cindex Working on separate files ! By default, @code{sed} will consider the files specified on the command line as a single continuous long stream. This @value{SSED} ! extension allows the user to consider them separate files: ! range addresses (such as @code{/abc/,/def/}) are not allowed to span several files, line numbers are relative to the start of each file, @code{$} refers to the last line of each file, and files invoked from the @code{R} commands are rewound at the start of each file. ! @item @code{-u} ! @itemx @code{--unbuffered} @opindex -u @opindex --unbuffered @cindex Unbuffered I/O, choosing Buffer both input and output as minimally as practical. (This is particularly useful if the input is coming from ! the likes of @code{tail -f}, and you wish to see the transformed output as soon as possible.) ! @item @code{-e} @var{script} ! @itemx @code{--expression=@var{script}} @opindex -e @opindex --expression @cindex Script, from command line Add the commands in @var{script} to the set of commands to be run while processing the input. ! @item @code{-f} @var{script-file} ! @itemx @code{--file=@var{script-file}} @opindex -f @opindex --file @cindex Script, from a file --- 271,308 ---- Perl-style regular expressions}. @end ifset ! @item -s ! @itemx --separate @cindex Working on separate files ! By default, @command{sed} will consider the files specified on the command line as a single continuous long stream. This @value{SSED} ! extension allows the user to consider them as separate files: ! range addresses (such as @samp{/abc/,/def/}) are not allowed to span several files, line numbers are relative to the start of each file, @code{$} refers to the last line of each file, and files invoked from the @code{R} commands are rewound at the start of each file. ! @item -u ! @itemx --unbuffered @opindex -u @opindex --unbuffered @cindex Unbuffered I/O, choosing Buffer both input and output as minimally as practical. (This is particularly useful if the input is coming from ! the likes of @samp{tail -f}, and you wish to see the transformed output as soon as possible.) ! @item -e @var{script} ! @itemx --expression=@var{script} @opindex -e @opindex --expression @cindex Script, from command line Add the commands in @var{script} to the set of commands to be run while processing the input. ! @item -f @var{script-file} ! @itemx --file=@var{script-file} @opindex -f @opindex --file @cindex Script, from a file *************** *** 313,319 **** @end table ! If no @samp{-e}, @samp{-f}, @samp{--expression}, or @samp{--file} options are given on the command-line, then the first non-option argument on the command line is taken to be the @var{script} to be executed. --- 311,317 ---- @end table ! If no @option{-e}, @option{-f}, @option{--expression}, or @option{--file} options are given on the command-line, then the first non-option argument on the command line is taken to be the @var{script} to be executed. *************** *** 328,344 **** @node sed Programs ! @chapter @code{sed} Programs ! @cindex @code{sed} program structure @cindex Script structure ! A @code{sed} program consists of one or more @code{sed} commands, passed in by one or more of the ! @samp{-e}, @samp{-f}, @samp{--expression}, and @samp{--file} options, or the first non-option argument if zero of these options are used. ! This document will refer to ``the'' @code{sed} script; ! this will be understood to mean the in-order catenation of all of the @var{script}s and @var{script-file}s passed in. Each @code{sed} command consists of an optional address or --- 326,342 ---- @node sed Programs ! @chapter @command{sed} Programs ! @cindex @command{sed} program structure @cindex Script structure ! A @command{sed} program consists of one or more @command{sed} commands, passed in by one or more of the ! @option{-e}, @option{-f}, @option{--expression}, and @option{--file} options, or the first non-option argument if zero of these options are used. ! This document will refer to ``the'' @command{sed} script; ! this is understood to mean the in-order catenation of all of the @var{script}s and @var{script-file}s passed in. Each @code{sed} command consists of an optional address or *************** *** 346,397 **** and any additional command-specific code. @menu ! * Addresses:: Selecting lines with @code{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @code{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @code{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @code{sed} gurus ! * SSED-specific Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters @end menu @node Addresses ! @section Selecting lines with @code{sed} ! @cindex Addresses, in @code{sed} scripts @cindex Line selection @cindex Selecting lines to process ! Addresses in a @code{sed} script can be in any of the following forms: ! @table @samp @item @var{number} @cindex Address, numeric @cindex Line, selecting by number Specifying a line number will match only that line in the input. ! (Note that @code{sed} counts lines continuously across all input files ! unless @code{-i} or @code{-s} options are specified.) @item @var{first}~@var{step} ! @cindex @acronym{GNU} extensions, @code{@var{n}~@var{m}} addresses ! This @sc{gnu} extension matches every @var{step}th line starting with line @var{first}. In particular, lines will be selected when there exists a non-negative @var{n} such that the current line-number equals @var{first} + (@var{n} * @var{step}). Thus, to select the odd-numbered lines, one would use @code{1~2}; ! to pick every third line starting with the second, @code{2~3} would be used; ! to pick every fifth line starting with the tenth, use @code{10~5}; ! and @code{50~0} is just an obscure way of saying @code{50}. @item $ @cindex Address, last line @cindex Last line, selecting @cindex Line, selecting last This address matches the last line of the last file of input, or ! the last line of each file when the @code{-i} or @code{-s} options are specified. @item /@var{regexp}/ --- 344,395 ---- and any additional command-specific code. @menu ! * Addresses:: Selecting lines with @command{sed} * Regular Expressions:: Overview of regular expression syntax ! * Data Spaces:: Where @command{sed} buffers data * Common Commands:: Often used commands ! * The "s" Command:: @command{sed}'s Swiss Army Knife * Other Commands:: Less frequently used commands ! * Programming Commands:: Commands for @command{sed} gurus ! * Extended Commands:: Commands specific of @value{SSED} * Escapes:: Specifying special characters @end menu @node Addresses ! @section Selecting lines with @command{sed} ! @cindex Addresses, in @command{sed} scripts @cindex Line selection @cindex Selecting lines to process ! Addresses in a @command{sed} script can be in any of the following forms: ! @table @code @item @var{number} @cindex Address, numeric @cindex Line, selecting by number Specifying a line number will match only that line in the input. ! (Note that @command{sed} counts lines continuously across all input files ! unless @option{-i} or @option{-s} options are specified.) @item @var{first}~@var{step} ! @cindex @acronym{GNU} extensions, @samp{@var{n}~@var{m}} addresses ! This @acronym{GNU} extension matches every @var{step}th line starting with line @var{first}. In particular, lines will be selected when there exists a non-negative @var{n} such that the current line-number equals @var{first} + (@var{n} * @var{step}). Thus, to select the odd-numbered lines, one would use @code{1~2}; ! to pick every third line starting with the second, @samp{2~3} would be used; ! to pick every fifth line starting with the tenth, use @samp{10~5}; ! and @samp{50~0} is just an obscure way of saying @code{50}. @item $ @cindex Address, last line @cindex Last line, selecting @cindex Line, selecting last This address matches the last line of the last file of input, or ! the last line of each file when the @option{-i} or @option{-s} options are specified. @item /@var{regexp}/ *************** *** 406,419 **** @cindex @acronym{GNU} extensions, modifiers and the empty regular expression @cindex @value{SSEDEXT}, modifiers and the empty regular expression Unless @code{POSIXLY_CORRECT} is set, the empty regular expression ! @code{//} repeats the last regular expression match (the same holds if the empty regular expression is passed to the @code{s} command). Note that modifiers to regular expressions are evaluated when the regular expression is compiled, thus it is illegal to specify them together with the empty regular expression. ! If @code{POSIXLY_CORRECT} is set, instead, @code{//} is the null match: this behavior is mandated by @sc{posix}, but it would break too many legacy ! sed scripts to blithely change @value{SSED}'s default behavior. @item \%@var{regexp}% (The @code{%} may be replaced by any other single character.) --- 404,418 ---- @cindex @acronym{GNU} extensions, modifiers and the empty regular expression @cindex @value{SSEDEXT}, modifiers and the empty regular expression Unless @code{POSIXLY_CORRECT} is set, the empty regular expression ! @samp{//} repeats the last regular expression match (the same holds if the empty regular expression is passed to the @code{s} command). Note that modifiers to regular expressions are evaluated when the regular expression is compiled, thus it is illegal to specify them together with the empty regular expression. ! @c *** CHECK CURRENT POSIX, I'M NOT SURE THIS IS STILL TRUE. ADR. ! If @code{POSIXLY_CORRECT} is set, instead, @samp{//} is the null match: this behavior is mandated by @sc{posix}, but it would break too many legacy ! @command{sed} scripts to blithely change @value{SSED}'s default behavior. @item \%@var{regexp}% (The @code{%} may be replaced by any other single character.) *************** *** 422,428 **** This also matches the regular expression @var{regexp}, but allows one to use a different delimiter than @code{/}. This is particularly useful if the @var{regexp} itself contains ! a lot of @code{/}s, since it avoids the tedious escaping of every @code{/}. If @var{regexp} itself includes any delimiter characters, each must be escaped by a backslash (@code{\}). --- 421,427 ---- This also matches the regular expression @var{regexp}, but allows one to use a different delimiter than @code{/}. This is particularly useful if the @var{regexp} itself contains ! a lot of slashes, since it avoids the tedious escaping of every @code{/}. If @var{regexp} itself includes any delimiter characters, each must be escaped by a backslash (@code{\}). *************** *** 432,438 **** @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @sc{gnu} extension which causes the @var{regexp} to be matched in a case-insensitive manner. --- 431,437 ---- @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @acronym{GNU} extension which causes the @var{regexp} to be matched in a case-insensitive manner. *************** *** 446,453 **** extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences (@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} ! in basic or extended regular expression modes) which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. --- 445,460 ---- extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences ! @ifset PERL ! (@code{\A} and @code{\Z} in Perl mode, ! @code{\`} and @code{\'} ! @end ifset ! @ifclear PERL ! (@code{\`} and @code{\'} ! @end ifclear ! in basic or extended regular expression modes) ! which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. *************** *** 490,498 **** (inclusively). If the second address is a @var{regexp}, then checking for the ending match will start with the line @emph{following} the ! line which matched the first address. As a @sc{gnu} extension, a line number of @code{0} can be used in an address specification ! like @code{0,/@var{regexp}/} so that @var{regexp} will be matched in the first input line too. If the second address is a @var{number} less than (or equal to) --- 497,505 ---- (inclusively). If the second address is a @var{regexp}, then checking for the ending match will start with the line @emph{following} the ! line which matched the first address. As a @acronym{GNU} extension, a line number of @code{0} can be used in an address specification ! like @samp{0,/@var{regexp}/} so that @var{regexp} will be matched in the first input line too. If the second address is a @var{number} less than (or equal to) *************** *** 508,533 **** @cindex @acronym{GNU} extensions, 0,@var{addr2} addressing @cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing @cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing ! @value{SSED} also supports some special 2-address forms: ! @table @samp @item 0,@var{addr2} ! Start out in "matched first address" state, until @var{addr2} is found. ! This is similar to 1,@var{addr2}, except that if @var{addr2} matches the very first line of input the 0,@var{addr2} form will be at the end of its range, whereas the 1,@var{addr2} form will still be at the beginning of its range. @item @var{addr1},+@var{N} ! Will match @var{addr1} and the @var{N} lines following @var{addr1}. @item @var{addr1},~@var{N} ! Will match @var{addr1} and the lines following @var{addr1} until the next line whose input line number is a multiple of @var{N}. @end table @cindex Excluding lines @cindex Selecting non-matching lines Appending the @code{!} character to the end of an address ! specification will negate the sense of the match. That is, if the @code{!} character follows an address range, then only lines which do @emph{not} match the address range will be selected. --- 515,542 ---- @cindex @acronym{GNU} extensions, 0,@var{addr2} addressing @cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing @cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing ! @value{SSED} also supports some special two-address forms: ! @table @code @item 0,@var{addr2} ! Start out in ``matched first address'' state, until @var{addr2} is found. ! This is similar to @samp{1,@var{addr2}}, except that if @var{addr2} matches the very first line of input the 0,@var{addr2} form will be at the end of its range, whereas the 1,@var{addr2} form will still be at the beginning of its range. + @item @var{addr1},+@var{N} ! Matches @var{addr1} and the @var{N} lines following @var{addr1}. ! @item @var{addr1},~@var{N} ! Matches @var{addr1} and the lines following @var{addr1} until the next line whose input line number is a multiple of @var{N}. @end table @cindex Excluding lines @cindex Selecting non-matching lines Appending the @code{!} character to the end of an address ! specification negates the sense of the match. That is, if the @code{!} character follows an address range, then only lines which do @emph{not} match the address range will be selected. *************** *** 536,544 **** @node Regular Expressions ! @section Overview of regular expression syntax ! To know how to use @code{sed}, people should understand regular expressions (@dfn{regexp} for short). A regular expression is a pattern that is matched against a subject string from left to right. Most characters stand for --- 545,553 ---- @node Regular Expressions ! @section Overview of Regular Expression Syntax ! To know how to use @command{sed}, people should understand regular expressions (@dfn{regexp} for short). A regular expression is a pattern that is matched against a subject string from left to right. Most characters stand for *************** *** 556,578 **** These are encoded in the pattern by the use of metacharacters, which do not stand for themselves but instead are interpreted in some special way. Here is a brief description ! of regular expression syntax as used in @code{sed}. @table @code @item @var{char} ! A single char, if not special, is matched against text. @item * ! Matches a sequence of zero or more repetitions of previous char, grouped regexp (see below), or class. @item \+ @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but matches one or more. It is a @sc{gnu} extension. @item \? @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but only matches zero or one. It is a @sc{gnu} extension. @item \@{@var{i}\@} As *, but matches exactly @var{i} sequences (@var{i} is a --- 565,587 ---- These are encoded in the pattern by the use of metacharacters, which do not stand for themselves but instead are interpreted in some special way. Here is a brief description ! of regular expression syntax as used in @command{sed}. @table @code @item @var{char} ! A single character, if not special, is matched against text. @item * ! Matches a sequence of zero or more repetitions of previous character, grouped regexp (see below), or class. @item \+ @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but matches one or more. It is a @acronym{GNU} extension. @item \? @cindex @acronym{GNU} extensions, to basic regular expressions ! As *, but only matches zero or one. It is a @acronym{GNU} extension. @item \@{@var{i}\@} As *, but matches exactly @var{i} sequences (@var{i} is a *************** *** 589,627 **** @itemize @bullet @item ! apply postfix operators, like @code{\(abcd\)*}: this will search for zero or more whole sequences of @samp{abcd}, while @code{abcd*} would search for @samp{abc} followed by zero or more occurrences ! of @samp{d} @item ! use back references (see below) @end itemize @item . Matches any character @item ^ ! Match the null string at beginning of line, i.e. what ! what appears after the caret must appear at the beginning of line. @code{^#include} will match only ! lines where "#include" is the first thing on line---if ! there are one or two spaces before, the match fails. @item $ It is the same as @code{^}, but refers to end of line @item [@var{list}] @itemx [^@var{list}] ! Matches any single char in @var{list}: for example, @samp{[aeiou]} matches all vowels. A list may include sequences like @samp{@var{char1}-@var{char2}}, which matches any character between (inclusive) @var{char1} and @var{char2}. The caret reverses the meaning of the regexp, so that ! it matches any single char NOT in list. To include @samp{]} in the list, make it the first character (after the caret if needed), to include @samp{-} in the list, make it the first or last; to include @samp{^} put --- 598,638 ---- @itemize @bullet @item ! @cindex @acronym{GNU} extensions, to basic regular expressions ! Apply postfix operators, like @code{\(abcd\)*}: this will search for zero or more whole sequences of @samp{abcd}, while @code{abcd*} would search for @samp{abc} followed by zero or more occurrences ! of @samp{d}. Note that this is not in the @sc{posix} ! standard and hence is not portable. @item ! Use back references (see below) @end itemize @item . Matches any character @item ^ ! Matches the null string at beginning of line, i.e. what ! appears after the caret must appear at the beginning of line. @code{^#include} will match only ! lines where @samp{#include} is the first thing on line---if ! there are spaces before, for example, the match fails. @item $ It is the same as @code{^}, but refers to end of line @item [@var{list}] @itemx [^@var{list}] ! Matches any single character in @var{list}: for example, @samp{[aeiou]} matches all vowels. A list may include sequences like @samp{@var{char1}-@var{char2}}, which matches any character between (inclusive) @var{char1} and @var{char2}. The caret reverses the meaning of the regexp, so that ! it matches any single character NOT in list. To include @samp{]} in the list, make it the first character (after the caret if needed), to include @samp{-} in the list, make it the first or last; to include @samp{^} put *************** *** 633,657 **** parentheses to use complex alternative regular expressions. The matching process tries each alternative in turn, from left to right, and the first one that succeeds is used. ! It is a @sc{gnu} extension. @item \@var{digit} ! Matches the @var{digit}-th @code{\(\)} reference in the regular expression. @item \@var{char} Matches character @var{char}; this is to be used to match ! special chars, referred above. Note that the only C-like backslash sequence that you can portably assume to be interpreted is @code{\n} for a new-line; in particular @code{\t} matches a @samp{t} under most implementations ! of @code{sed}, rather than a tabulation character. @end table @cindex Greedy regular expression matching ! Note that the regular expression matcher is greedy, i.e. if ! two or more matches are detected, it selects the longest, if there are two or more selected with the same size, it selects the first in text. --- 644,668 ---- parentheses to use complex alternative regular expressions. The matching process tries each alternative in turn, from left to right, and the first one that succeeds is used. ! It is a @acronym{GNU} extension. @item \@var{digit} ! Matches the @var{digit}-th @code{\(@dots{}\)} reference in the regular expression. @item \@var{char} Matches character @var{char}; this is to be used to match ! special characters, referred above. Note that the only C-like backslash sequence that you can portably assume to be interpreted is @code{\n} for a new-line; in particular @code{\t} matches a @samp{t} under most implementations ! of @command{sed}, rather than a tabulation character. @end table @cindex Greedy regular expression matching ! Note that the regular expression matcher is greedy, i.e., if ! two or more matches are detected, it selects the longest; if there are two or more selected with the same size, it selects the first in text. *************** *** 659,672 **** Examples: @table @samp @item abcdef ! Matches @samp{abcdef} @item a*b Matches zero or more @samp{a}s followed by a single @samp{b}. For example, @samp{b} or @samp{aaaaab}. @item a\?b ! Matches @samp{b} or @samp{ab} @item a\+b\+ Matches one or more @samp{a}s followed by one or more --- 670,683 ---- Examples: @table @samp @item abcdef ! Matches @samp{abcdef}. @item a*b Matches zero or more @samp{a}s followed by a single @samp{b}. For example, @samp{b} or @samp{aaaaab}. @item a\?b ! Matches @samp{b} or @samp{ab}. @item a\+b\+ Matches one or more @samp{a}s followed by one or more *************** *** 676,736 **** @item .* @itemx .\+ ! These two will both match all the characters on a line; ! however, the first will match every line (including empty ! ones), while the second will only match lines containing ! at least one char. @item ^main.*(.*) ! This will search for a line containing "main" as the first thing on the line, followed by an opening and closing parenthesis. The @samp{n}, @samp{(} and @samp{)} need not ! be adjacent @item ^# ! This will match lines beginning with a hash (or sharp) character. @item \\$ ! This will match lines ending with a single backslash. The regexp contains two backslashes for escaping. @item \$ ! Instead, this will match lines containing a single dollar, because it is escaped. ! @item [a-zA-Z_] ! This will match any letters or digits @item [^ @kbd{tab}]\+ ! This will match one or more sequences ! of any char that isn't a space or tab. ! Usually this means a word @item ^\(.*\)\n\1$ ! This will match two equal lines without a trailing new-line ! @item A.\@{9\@}$ ! This will match an "A" that is exactly the last tenth ! character on line ! @item ^.\@{,15\@}A ! Match the last "A" on the first 16 chars of the line @end table @node Data Spaces ! @section Where @code{sed} buffers data @cindex Buffer spaces, pattern and hold @cindex Spaces, pattern and hold @cindex Pattern space, definition @cindex Hold space, definition ! @code{sed} maintains two data buffers: the active @emph{pattern} space, and the auxiliary @emph{hold} space. ! In ``normal'' operation, @code{sed} reads in one line from the input stream and places it in the pattern space. This pattern space is where text manipulations occur. The hold space is initially empty, but there are commands --- 687,747 ---- @item .* @itemx .\+ ! These two both match all the characters on a line; ! however, the first matches every line (including empty ! ones), while the second only matches lines containing ! at least one character. @item ^main.*(.*) ! This searches for a line containing @samp{main} as the first thing on the line, followed by an opening and closing parenthesis. The @samp{n}, @samp{(} and @samp{)} need not ! be adjacent. @item ^# ! This matches lines beginning with a hash (or sharp) character. @item \\$ ! This matches lines ending with a single backslash. The regexp contains two backslashes for escaping. @item \$ ! Instead, this matches lines containing a single dollar, because it is escaped. ! @item [a-zA-Z0-9] ! This matches any letters or digits. @item [^ @kbd{tab}]\+ ! This matches one or more sequences ! of any character that isn't a space or tab. ! Usually this means a word. @item ^\(.*\)\n\1$ ! This matches two equal lines without a trailing new-line. ! @item .\@{9\@}A$ ! This matches an @code{A} that is the last ! character on line, with at least nine preceding characters. ! @item ^.\@{15\@}A ! This matches an @code{A} that is the 16th character on a line. @end table @node Data Spaces ! @section Where @command{sed} Buffers Data @cindex Buffer spaces, pattern and hold @cindex Spaces, pattern and hold @cindex Pattern space, definition @cindex Hold space, definition ! @command{sed} maintains two data buffers: the active @emph{pattern} space, and the auxiliary @emph{hold} space. ! In ``normal'' operation, @command{sed} reads in one line from the input stream and places it in the pattern space. This pattern space is where text manipulations occur. The hold space is initially empty, but there are commands *************** *** 739,750 **** @node Common Commands ! @section Often used commands ! If you use @code{sed} at all, you will quite likely want to know these commands. ! @table @samp @item # [No addresses allowed.] --- 750,761 ---- @node Common Commands ! @section Often-Used Commands ! If you use @command{sed} at all, you will quite likely want to know these commands. ! @table @code @item # [No addresses allowed.] *************** *** 755,773 **** @cindex Portability, comments If you are concerned about portability, be aware that ! some implementations of @code{sed} (which are not @sc{posix.2} conformant) may only support a single one-line comment, and then only when the very first character of the script is a @code{#}. @findex -n, forcing from within a script @cindex Caveat --- #n on first line ! Warning: if the first two characters of the @code{sed} script ! are @code{#n}, then the @samp{-n} (no-autoprint) option is forced. If you want to put a comment in the first line of your script ! and that comment begins with the letter `n' and you do not want this behavior, ! then be sure to either use a capital `N', ! or place at least one space before the `n'. @item q [@var{exit-code}] [At most one address allowed.] --- 766,784 ---- @cindex Portability, comments If you are concerned about portability, be aware that ! some implementations of @command{sed} (which are not @sc{posix} conformant) may only support a single one-line comment, and then only when the very first character of the script is a @code{#}. @findex -n, forcing from within a script @cindex Caveat --- #n on first line ! Warning: if the first two characters of the @command{sed} script ! are @code{#n}, then the @option{-n} (no-autoprint) option is forced. If you want to put a comment in the first line of your script ! and that comment begins with the letter @samp{n} and you do not want this behavior, ! then be sure to either use a capital @samp{N}, ! or place at least one space before the @samp{n}. @item q [@var{exit-code}] [At most one address allowed.] *************** *** 775,784 **** @findex q (quit) command @cindex @value{SSEDEXT}, returning an exit code @cindex Quitting ! Exit @code{sed} without processing any more commands or input. Note that the current pattern space is printed if auto-print is ! not disabled with the @code{-n} switch. The ability to return ! an exit code from the @code{sed} script is a @value{SSED} extension. @item d @findex d (delete) command --- 786,795 ---- @findex q (quit) command @cindex @value{SSEDEXT}, returning an exit code @cindex Quitting ! Exit @command{sed} without processing any more commands or input. Note that the current pattern space is printed if auto-print is ! not disabled with the @option{-n} options. The ability to return ! an exit code from the @command{sed} script is a @value{SSED} extension. @item d @findex d (delete) command *************** *** 790,809 **** @findex p (print) command @cindex Text, printing Print out the pattern space (to the standard output). ! This command is usually only used in conjunction with the @samp{-n} command-line option. ! Note: some implementations of @code{sed}, such as this one, will double-print lines when auto-print is not disabled and the @code{p} command is given. Other implementations will only print the line once. ! Both ways conform with the @sc{posix.2} standard, and so neither way can be considered to be in error. ! @cindex Non-bugs, @code{p} command and @samp{-n} flag ! @cindex Portability, @code{p} command and @samp{-n} flag ! Portable @code{sed} scripts should thus avoid relying on either behavior; ! either use the @samp{-n} option and explicitly print what you want, or avoid use of the @code{p} command (and also the @code{p} flag to the @code{s} command). --- 801,821 ---- @findex p (print) command @cindex Text, printing Print out the pattern space (to the standard output). ! This command is usually only used in conjunction with the @option{-n} command-line option. ! Note: some implementations of @command{sed}, such as this one, will double-print lines when auto-print is not disabled and the @code{p} command is given. Other implementations will only print the line once. ! @c CHECK CURRENT STANDARD. I THINK NOT DOUBLE-PRINTING IS HOW IT WORKS. ADR. ! Both ways conform with the @sc{posix} standard, and so neither way can be considered to be in error. ! @cindex Non-bugs, @code{p} command and @option{-n} flag ! @cindex Portability, @code{p} command and @option{-n} flag ! Portable @command{sed} scripts should thus avoid relying on either behavior; ! either use the @option{-n} option and explicitly print what you want, or avoid use of the @code{p} command (and also the @code{p} flag to the @code{s} command). *************** *** 813,819 **** @cindex Read next input line If auto-print is not disabled, print the pattern space, then, regardless, replace the pattern space with the next line of input. ! If there is no more input then @code{sed} exits without processing any more commands. @item @{ @var{commands} @} --- 825,831 ---- @cindex Read next input line If auto-print is not disabled, print the pattern space, then, regardless, replace the pattern space with the next line of input. ! If there is no more input then @command{sed} exits without processing any more commands. @item @{ @var{commands} @} *************** *** 831,844 **** @section The @code{s} Command The syntax of the @code{s} (as in substitute) command is ! s/@var{regexp}/@var{replacement}/@var{flags}. The @code{/} characters may be uniformly replaced by any other single character within any given @code{s} command. The @code{/} character (or whatever other character is used in its stead) can appear in the @var{regexp} or @var{replacement} only if it is preceded by a @code{\} character. ! The @code{s} command is probably the most important in @code{sed} and has a lot of different options. Its basic concept is simple: the @code{s} command attempts to match the pattern space against the supplied @var{regexp}; if the match is --- 843,856 ---- @section The @code{s} Command The syntax of the @code{s} (as in substitute) command is ! @samp{s/@var{regexp}/@var{replacement}/@var{flags}}. The @code{/} characters may be uniformly replaced by any other single character within any given @code{s} command. The @code{/} character (or whatever other character is used in its stead) can appear in the @var{regexp} or @var{replacement} only if it is preceded by a @code{\} character. ! The @code{s} command is probably the most important in @command{sed} and has a lot of different options. Its basic concept is simple: the @code{s} command attempts to match the pattern space against the supplied @var{regexp}; if the match is *************** *** 852,878 **** the portion of the match which is contained between the @var{n}th @code{\(} and its matching @code{\)}. Also, the @var{replacement} can contain unescaped @code{&} ! characters which will reference the whole matched portion of the pattern space. ! @cindex @value{SSEDEXT}, case modifiers in `s' commands Finally (this is a @value{SSED} extension) you can include a special sequence made of a backslash and one of the letters ! @code{LlUuE}. The meaning is, respectively: turn the replacement ! to lowercase until a @code{\U} or @code{\E} is found, turn the ! next character to lowercase, turn the replacement to uppercase ! until a @code{\L} or @code{\E} is found, turn the next character ! to uppercase, and stop case conversion started by @code{\L} or ! @code{\U}. To include a literal @code{\}, @code{&}, or newline in the final replacement, be sure to precede the desired @code{\}, @code{&}, or newline in the @var{replacement} with a @code{\}. @findex s command, option flags @cindex Substitution of text, options ! The @code{s} command can be followed with zero or more of the following @var{flags}: ! @table @samp @item g @cindex Global substitution @cindex Replacing all text matching regexp in a line --- 864,908 ---- the portion of the match which is contained between the @var{n}th @code{\(} and its matching @code{\)}. Also, the @var{replacement} can contain unescaped @code{&} ! characters which reference the whole matched portion of the pattern space. ! @cindex @value{SSEDEXT}, case modifiers in @code{s} commands Finally (this is a @value{SSED} extension) you can include a special sequence made of a backslash and one of the letters ! @code{L}, @code{l}, @code{U}, @code{u}, or @code{E}. ! The meaning is as follows: ! ! @table @code ! @item \L ! Turn the replacement ! to lowercase until a @code{\U} or @code{\E} is found, ! ! @item \l ! Turn the ! next character to lowercase, ! ! @item \U ! Turn the replacement to uppercase ! until a @code{\L} or @code{\E} is found, ! ! @item \u ! Turn the next character ! to uppercase, ! ! @item \E ! Stop case conversion started by @code{\L} or @code{\U}. ! @end table ! To include a literal @code{\}, @code{&}, or newline in the final replacement, be sure to precede the desired @code{\}, @code{&}, or newline in the @var{replacement} with a @code{\}. @findex s command, option flags @cindex Substitution of text, options ! The @code{s} command can be followed by zero or more of the following @var{flags}: ! @table @code @item g @cindex Global substitution @cindex Replacing all text matching regexp in a line *************** *** 885,894 **** @cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command @cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command ! Note: the @sc{posix.2} standard does not specify what should happen when you mix the @code{g} and @var{number} modifiers, and currently there is no widely agreed upon meaning ! across @code{sed} implementations. For @value{SSED}, the interaction is defined to be: ignore matches before the @var{number}th, and then match and replace all matches from --- 915,924 ---- @cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command @cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command ! Note: the @sc{posix} standard does not specify what should happen when you mix the @code{g} and @var{number} modifiers, and currently there is no widely agreed upon meaning ! across @command{sed} implementations. For @value{SSED}, the interaction is defined to be: ignore matches before the @var{number}th, and then match and replace all matches from *************** *** 900,910 **** Note: when both the @code{p} and @code{e} options are specified, the relative ordering of the two produces very different results. ! In general, @code{ep} (evaluate then print) will be what you want, but operating the other way round can be useful for debugging. ! For this reason, the current versions of @value{SSED} interprets specially the presence of @code{p} options both before and after ! @code{e}, printing pattern space before and after evaluation, while in general flags for the @code{s} command show their effect just once. This behavior, although documented, might change in future versions. --- 930,940 ---- Note: when both the @code{p} and @code{e} options are specified, the relative ordering of the two produces very different results. ! In general, @code{ep} (evaluate then print) is what you want, but operating the other way round can be useful for debugging. ! For this reason, the current version of @value{SSED} interprets specially the presence of @code{p} options both before and after ! @code{e}, printing the pattern space before and after evaluation, while in general flags for the @code{s} command show their effect just once. This behavior, although documented, might change in future versions. *************** *** 917,924 **** As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @code{-i} ! switch is being used.} @item e @cindex Evaluate Bourne-shell commands, after substitution --- 947,954 ---- As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @option{-i} ! option is being used.} @item e @cindex Evaluate Bourne-shell commands, after substitution *************** *** 930,936 **** that is found in pattern space is executed and pattern space is replaced with its output. A trailing new-line is suppressed; results are undefined if the command to be executed contains ! a @code{nul} character. This is a @value{SSED} extension. @item I @itemx i --- 960,966 ---- that is found in pattern space is executed and pattern space is replaced with its output. A trailing new-line is suppressed; results are undefined if the command to be executed contains ! a @sc{nul} character. This is a @value{SSED} extension. @item I @itemx i *************** *** 939,946 **** @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @sc{gnu} ! extension which makes @code{sed} match @var{regexp} in a case-insensitive manner. @item M --- 969,976 ---- @ifset PERL @cindex Perl-style regular expressions, case-insensitive @end ifset ! The @code{I} modifier to regular-expression matching is a @acronym{GNU} ! extension which makes @command{sed} match @var{regexp} in a case-insensitive manner. @item M *************** *** 953,959 **** extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences (@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} in basic or extended regular expression modes) which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. --- 983,995 ---- extension which causes @code{^} and @code{$} to match respectively (in addition to the normal behavior) the empty string after a new-line, and the empty string before a new-line. There are special character ! sequences ! @ifset PERL ! (@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} ! @end ifset ! @ifclear PERL ! (@code{\`} and @code{\'} ! @end ifclear in basic or extended regular expression modes) which always match the beginning or the end of the buffer. @code{M} stands for @cite{multi-line}. *************** *** 985,1028 **** @node Other Commands ! @section Less frequently used commands Though perhaps less frequently used than those in the previous ! section, some very small yet useful @code{sed} scripts can be built with these commands. ! @table @samp ! @item v ! @findex v (version) command ! @cindex @value{SSEDEXT}, checking for their presence ! @cindex Requiring @value{SSED} ! This command does nothing, but will make @code{sed} fail if ! @value{SSED} extensions are not supported, simply because other ! implementations of @code{sed} do not implement it. ! ! @item Q [@var{exit-code}] ! @findex Q (silent Quit) command ! @cindex @value{SSEDEXT}, quitting silently ! @cindex @value{SSEDEXT}, returning an exit code ! @cindex Quitting ! This command is the same as @code{q}, but will not print the ! contents of pattern space. Like @code{q}, it provides the ! ability to return an exit code to the caller. ! ! This command can be useful because the only alternative ways ! to accomplish this apparently trivial function are to use ! the @code{-n} option (which can unnecessarily complicate ! your script) or resorting to the following snippet, which ! wastes time by reading the whole file without any visible effect: ! ! @example ! :eat ! $d # @r{Quit silently on the last line} ! N # @r{Read another line, silently} ! g # @r{Overwrite pattern space each time to save memory} ! b eat ! @end example ! @item y/@var{source-chars}/@var{dest-chars}/ (The @code{/} characters may be uniformly replaced by any other single character within any given @code{y} command.) --- 1021,1033 ---- @node Other Commands ! @section Less Frequently-Used Commands Though perhaps less frequently used than those in the previous ! section, some very small yet useful @command{sed} scripts can be built with these commands. ! @table @code @item y/@var{source-chars}/@var{dest-chars}/ (The @code{/} characters may be uniformly replaced by any other single character within any given @code{y} command.) *************** *** 1048,1058 **** @cindex Text, appending Queue the lines of text which follow this command (each but the last ending with a @code{\}, ! which will be removed from the output) to be output at the end of the current cycle, or when the next input line is read. ! As a @sc{gnu} extension, if between the @code{a} and the newline there is other than a whitespace-@code{\} sequence, then the text of this line, starting at the first non-whitespace character after the @code{a}, is taken as the first line of the @var{text} block. --- 1053,1063 ---- @cindex Text, appending Queue the lines of text which follow this command (each but the last ending with a @code{\}, ! which are removed from the output) to be output at the end of the current cycle, or when the next input line is read. ! As a @acronym{GNU} extension, if between the @code{a} and the newline there is other than a whitespace-@code{\} sequence, then the text of this line, starting at the first non-whitespace character after the @code{a}, is taken as the first line of the @var{text} block. *************** *** 1068,1074 **** @cindex Text, insertion Immediately output the lines of text which follow this command (each but the last ending with a @code{\}, ! which will be removed from the output). @item c\ @itemx @var{text} --- 1073,1079 ---- @cindex Text, insertion Immediately output the lines of text which follow this command (each but the last ending with a @code{\}, ! which are removed from the output). @item c\ @itemx @var{text} *************** *** 1077,1083 **** Delete the lines matching the address or address-range, and output the lines of text which follow this command (each but the last ending with a @code{\}, ! which will be removed from the output) in place of the last line (or in place of each line, if no addresses were specified). A new cycle is started after this command is done, --- 1082,1088 ---- Delete the lines matching the address or address-range, and output the lines of text which follow this command (each but the last ending with a @code{\}, ! which are removed from the output) in place of the last line (or in place of each line, if no addresses were specified). A new cycle is started after this command is done, *************** *** 1108,1136 **** the default as specified on the command line is used. The @var{n} parameter is a @value{SSED} extension. - @item L @var{n} - @findex L (fLow paragraphs) command - @cindex Reformat pattern space - @cindex Reformatting paragraphs - @cindex @value{SSEDEXT}, reformatting paragraphs - @cindex @value{SSEDEXT}, @code{L} command - This @value{SSED} extension fills and joins lines in pattern space - to produce output lines of (at most) @var{n} characters, like - @code{fmt} does; if @var{n} is omitted, the default as specified - on the command line is used. - - Blank lines, spaces between words, and indentation are - preserved in the output; successive input lines with different - indentation are not joined; tabs are expanded to 8 columns. - - If pattern space contains multiple lines, they are joined, but - since pattern space usually contains a single line, the behavior - of a simple @code{L;d} script is the same as @code{fmt -s} (i.e. - it does not join short lines to form longer ones). - - @var{n} specifies the desired line-wrap length; if omitted, - the default as specified on the command line is used. - @item r @var{filename} [At most one address allowed.] --- 1113,1118 ---- *************** *** 1147,1168 **** is supported for the file name, which reads the contents of the standard input. - @item R @var{filename} - @findex R (read line) command - @cindex Read text from a file - @cindex @value{SSEDEXT}, reading a file a line at a time - @cindex @value{SSEDEXT}, @code{R} command - @cindex @value{SSEDEXT}, @file{/dev/stdin} file - Queue a line of @var{filename} to be read and - inserted into the output stream at the end of the current cycle, - or when the next input line is read. - Note that if @var{filename} cannot be read, or if its end is - reached, no line is appended, without any error indication. - - As with the @code{r} command, the special value @file{/dev/stdin} - is supported for the file name, which reads a line from the - standard input. - @item w @var{filename} @findex w (write file) command @cindex Write to a file --- 1129,1134 ---- *************** *** 1172,1179 **** As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @code{-i} ! switch is being used.} The file will be created (or truncated) before the first input line is read; all @code{w} commands --- 1138,1145 ---- As a @value{SSED} extension, two special values of @var{file-name} are supported: @file{/dev/stderr}, which writes the result to the standard error, and @file{/dev/stdout}, which writes to the standard ! output.@footnote{This is equivalent to @code{p} unless the @option{-i} ! option is being used.} The file will be created (or truncated) before the first input line is read; all @code{w} commands *************** *** 1195,1201 **** @cindex Append next input line to pattern space Add a newline to the pattern space, then append the next line of input to the pattern space. ! If there is no more input then @code{sed} exits without processing any more commands. @item P --- 1161,1167 ---- @cindex Append next input line to pattern space Add a newline to the pattern space, then append the next line of input to the pattern space. ! If there is no more input then @command{sed} exits without processing any more commands. @item P *************** *** 1241,1256 **** @node Programming Commands ! @section Commands for @code{sed} gurus In most cases, use of these commands indicates that you are ! probably better off programming in something like @code{awk} or Perl. But occasionally one is committed to sticking ! with @code{sed}, and these commands can enable one to write quite convoluted scripts. @cindex Flow of control in scripts ! @table @samp @item : @var{label} [No addresses allowed.] --- 1207,1222 ---- @node Programming Commands ! @section Commands for @command{sed} gurus In most cases, use of these commands indicates that you are ! probably better off programming in something like @command{awk} or Perl. But occasionally one is committed to sticking ! with @command{sed}, and these commands can enable one to write quite convoluted scripts. @cindex Flow of control in scripts ! @table @code @item : @var{label} [No addresses allowed.] *************** *** 1274,1342 **** since the last input line was read or conditional branch was taken. The @var{label} may be omitted, in which case the next cycle is started. - @item T @var{label} - @findex T (test and branch if failed) command - @cindex @value{SSEDEXT}, branch if @code{s///} failed - @cindex Branch to a label, if @code{s///} failed - @cindex Conditional branch - Branch to @var{label} only if there have been no successful - @code{s}ubstitutions since the last input line was read or - conditional branch was taken. The @var{label} may be omitted, - in which case the next cycle is started. - @end table ! @node SSED-specific Commands ! @section Commands specific of @value{SSED} ! These commands are specific of @value{SSED}, so you must use them with care and only when you are sure that ! hindering portability is not so evil. They allow to check for @value{SSED} extensions or to do tasks that are required ! quite often, yet unsupported by standard @code{sed}s. @table @code - @item v - @findex v (version) command - @cindex @value{SSEDEXT}, checking for their presence - @cindex Requiring @value{SSED} - This command does nothing, but will make @code{sed} fail if - @value{SSED} extensions are not supported, simply because other - implementations of @code{sed} do not implement it. - - @item Q [@var{exit-code}] - @findex Q (silent Quit) command - @cindex @value{SSEDEXT}, quitting silently - @cindex @value{SSEDEXT}, returning an exit code - @cindex Quitting - This command is the same as @code{q}, but will not print the - contents of pattern space. Like @code{q}, it provides the - ability to return an exit code to the caller. - - This command can be useful because the only alternative ways - to accomplish this apparently trivial function are to use - the @code{-n} option (which can unnecessarily complicate - your script) or resorting to the following snippet, which - wastes time by reading the whole file without any visible effect: - - @example - :eat - $d # @r{Quit silently on the last line} - N # @r{Read another line, silently} - g # @r{Overwrite pattern space each time to save memory} - b eat - @end example - - @item T @var{label} - @findex T (test and branch if failed) command - @cindex @value{SSEDEXT}, branch if @code{s///} failed - @cindex Branch to a label, if @code{s///} failed - @cindex Conditional branch - Branch to @var{label} only if there have been no successful - @code{s}ubstitutions since the last input line was read or - conditional branch was taken. The @var{label} may be omitted, - in which case the next cycle is started. - @item e [@var{command}] @findex e (evaluate) command @cindex Evaluate Bourne-shell commands --- 1240,1257 ---- since the last input line was read or conditional branch was taken. The @var{label} may be omitted, in which case the next cycle is started. @end table ! @node Extended Commands ! @section Commands Specific to @value{SSED} ! These commands are specific to @value{SSED}, so you must use them with care and only when you are sure that ! hindering portability is not evil. They allow you to check for @value{SSED} extensions or to do tasks that are required ! quite often, yet are unsupported by standard @command{sed}s. @table @code @item e [@var{command}] @findex e (evaluate) command @cindex Evaluate Bourne-shell commands *************** *** 1346,1352 **** This command allows one to pipe input from a shell command into pattern space. Without parameters, the @code{e} command executes the command that is found in pattern space and ! replaces pattern space with the output; a trailing new-line is suppressed. If a parameter is specified, instead, the @code{e} command --- 1261,1267 ---- This command allows one to pipe input from a shell command into pattern space. Without parameters, the @code{e} command executes the command that is found in pattern space and ! replaces the pattern space with the output; a trailing new-line is suppressed. If a parameter is specified, instead, the @code{e} command *************** *** 1354,1369 **** (like @code{r} does). The command can run across multiple lines, all but the last ending with a back-slash. ! In both cases, results are undefined if the command to be ! executed contains a @code{nul} character. ! ! @item W @var{filename} ! @findex W (write first line) command ! @cindex Write first line to a file ! @cindex @value{SSEDEXT}, writing first line to a file ! Write to the given filename the portion of the pattern space up to ! the first newline. Everything said under the @code{w} command about ! file handling holds here too. @item L @var{n} @findex L (fLow paragraphs) command --- 1269,1276 ---- (like @code{r} does). The command can run across multiple lines, all but the last ending with a back-slash. ! In both cases, the results are undefined if the command to be ! executed contains a @sc{nul} character. @item L @var{n} @findex L (fLow paragraphs) command *************** *** 1380,1393 **** preserved in the output; successive input lines with different indentation are not joined; tabs are expanded to 8 columns. ! If pattern space contains multiple lines, they are joined, but ! since pattern space usually contains a single line, the behavior ! of a simple @code{L;d} script is the same as @code{fmt -s} (i.e. it does not join short lines to form longer ones). @var{n} specifies the desired line-wrap length; if omitted, the default as specified on the command line is used. @item R @var{filename} @findex R (read line) command @cindex Read text from a file --- 1287,1323 ---- preserved in the output; successive input lines with different indentation are not joined; tabs are expanded to 8 columns. ! If the pattern space contains multiple lines, they are joined, but ! since the pattern space usually contains a single line, the behavior ! of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e., it does not join short lines to form longer ones). @var{n} specifies the desired line-wrap length; if omitted, the default as specified on the command line is used. + @item Q [@var{exit-code}] + @findex Q (silent Quit) command + @cindex @value{SSEDEXT}, quitting silently + @cindex @value{SSEDEXT}, returning an exit code + @cindex Quitting + This command is the same as @code{q}, but will not print the + contents of pattern space. Like @code{q}, it provides the + ability to return an exit code to the caller. + + This command can be useful because the only alternative ways + to accomplish this apparently trivial function are to use + the @option{-n} option (which can unnecessarily complicate + your script) or resorting to the following snippet, which + wastes time by reading the whole file without any visible effect: + + @example + :eat + $d @i{Quit silently on the last line} + N @i{Read another line, silently} + g @i{Overwrite pattern space each time to save memory} + b eat + @end example + @item R @var{filename} @findex R (read line) command @cindex Read text from a file *************** *** 1404,1439 **** is supported for the file name, which reads a line from the standard input. @end table @node Escapes ! @section @sc{gnu} extensions for escapes in regular expressions @cindex @acronym{GNU} extensions, special escapes ! Until this chapter, you have only encountered escapes of the form ! @samp{\^}, which tell @code{sed} not to interpret the caret as a special character, but rather to take it literally. For example, @samp{\*} matches a single asterisk rather than zero or more backslashes. @cindex @code{POSIXLY_CORRECT} behavior, escapes ! This chapter introduces another kind of escapes@footnote{All ! the escapes that are introduced in this character are@sc{gnu} extensions, with the exception of @code{\n}. In basic regular expression mode, setting @code{POSIXLY_CORRECT} disables them.}---that is, escapes that are applied to a character or sequence of characters ! that ordinarily is taken literally, and that @code{sed} replaces with a special character. This provides a way of encoding non-printable characters in patterns in a visible manner. There is no restriction on the appearance of non-printing characters ! in a @code{sed} script but when a script is being prepared in the shell or by text editing, it is usually easier to use one of the following escape sequences than the binary character it represents: The list of these escapes is: ! @table @samp @item \a Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7). --- 1334,1394 ---- is supported for the file name, which reads a line from the standard input. + @item T @var{label} + @findex T (test and branch if failed) command + @cindex @value{SSEDEXT}, branch if @code{s///} failed + @cindex Branch to a label, if @code{s///} failed + @cindex Conditional branch + Branch to @var{label} only if there have been no successful + @code{s}ubstitutions since the last input line was read or + conditional branch was taken. The @var{label} may be omitted, + in which case the next cycle is started. + + @item v + @findex v (version) command + @cindex @value{SSEDEXT}, checking for their presence + @cindex Requiring @value{SSED} + This command does nothing, but makes @command{sed} fail if + @value{SSED} extensions are not supported, simply because other + versions of @command{sed} do not implement it. + + @item W @var{filename} + @findex W (write first line) command + @cindex Write first line to a file + @cindex @value{SSEDEXT}, writing first line to a file + Write to the given filename the portion of the pattern space up to + the first newline. Everything said under the @code{w} command about + file handling holds here too. @end table @node Escapes ! @section @acronym{GNU} Extensions for Escapes in Regular Expressions @cindex @acronym{GNU} extensions, special escapes ! Until this chapter, we have only encountered escapes of the form ! @samp{\^}, which tell @command{sed} not to interpret the caret as a special character, but rather to take it literally. For example, @samp{\*} matches a single asterisk rather than zero or more backslashes. @cindex @code{POSIXLY_CORRECT} behavior, escapes ! This chapter introduces another kind of escape@footnote{All ! the escapes introduced here are @acronym{GNU} extensions, with the exception of @code{\n}. In basic regular expression mode, setting @code{POSIXLY_CORRECT} disables them.}---that is, escapes that are applied to a character or sequence of characters ! that ordinarily are taken literally, and that @command{sed} replaces with a special character. This provides a way of encoding non-printable characters in patterns in a visible manner. There is no restriction on the appearance of non-printing characters ! in a @command{sed} script but when a script is being prepared in the shell or by text editing, it is usually easier to use one of the following escape sequences than the binary character it represents: The list of these escapes is: ! @table @code @item \a Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7). *************** *** 1456,1462 **** Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is any character. The precise effect of @samp{\c@var{x}} is as follows: if @var{x} is a lower case letter, it is converted to upper case. ! Then bit 6 of the character (hex 40) is inverted. Thus "\cz" becomes hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B. @item \d@var{xxx} --- 1411,1417 ---- Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is any character. The precise effect of @samp{\c@var{x}} is as follows: if @var{x} is a lower case letter, it is converted to upper case. ! Then bit 6 of the character (hex 40) is inverted. Thus @samp{\cz} becomes hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B. @item \d@var{xxx} *************** *** 1478,1508 **** @end table @samp{\b} (backspace) was omitted because of the conflict with ! the existing "word boundary" meaning. Other escapes match particular character class and are only valid in regular expressions: ! @table @samp @item \s ! Matches any whitespace character @item \S ! Matches any character that is not a whitespace character @item \w Matches any ``word'' character. A ``word'' character is any letter or digit or the underscore character. @item \W ! Matches any ``non-word'' character @end table @node Examples ! @chapter Some sample scripts ! Here are some @code{sed} scripts to guide you in the art of mastering ! @code{sed}... @menu Some exotic examples: --- 1433,1464 ---- @end table @samp{\b} (backspace) was omitted because of the conflict with ! the existing ``word boundary'' meaning. ! @c \b AND \B ARE NOT INTRODUCED, NOR ARE \` and \'. ADR Other escapes match particular character class and are only valid in regular expressions: ! @table @code @item \s ! Matches any whitespace character. @item \S ! Matches any character that is not a whitespace character. @item \w Matches any ``word'' character. A ``word'' character is any letter or digit or the underscore character. @item \W ! Matches any ``non-word'' character. @end table @node Examples ! @chapter Some Sample Scripts ! Here are some @command{sed} scripts to guide you in the art of mastering ! @command{sed}. @menu Some exotic examples: *************** *** 1528,1537 **** @end menu @node Centering lines ! @section Centering lines ! This script will center all lines of a file on a 80 columns width. ! To change that width, the number in @code{\@{\@}} must be replaced, and the number of added spaces also must be changed. Note how the buffer commands are used to separate parts in --- 1484,1493 ---- @end menu @node Centering lines ! @section Centering Lines ! This script centers all lines of a file on a 80 columns width. ! To change that width, the number in @code{\@{@dots{}\@}} must be replaced, and the number of added spaces also must be changed. Note how the buffer commands are used to separate parts in *************** *** 1543,1549 **** #!/usr/bin/sed -f @group ! #@r{ Put 80 spaces in the buffer} 1 @{ x s/^$/ / --- 1499,1505 ---- #!/usr/bin/sed -f @group ! # Put 80 spaces in the buffer 1 @{ x s/^$/ / *************** *** 1553,1588 **** @end group @group ! #@r{ del leading and trailing spaces} y/@kbd{tab}/ / s/^ *// s/ *$// @end group @group ! #@r{ add a new-line and 80 spaces to end of line} G @end group @group ! #@r{ keep first 81 chars (80 + a new-line)} s/^\(.\@{81\@}\).*$/\1/ @end group @group ! #@r{ \2 matches half of the spaces, which are moved to the beginning} s/^\(.*\)\n\(.*\)\2/\2\1/ @end group @end example @c end--------------------------------------------- @node Increment a number ! @section Increment a number This script is one of a few that demonstrate how to do arithmetic ! in @code{sed}. This is indeed possible@footnote{@code{sed} guru Greg ! Ubben wrote an implementation of the @code{dc} @sc{rpn} calculator! ! It is distributed together with sed.}, but must be done manually. To increment one number you just add 1 to last digit, replacing it by the following digit. There is one exception: when the digit --- 1509,1544 ---- @end group @group ! # del leading and trailing spaces y/@kbd{tab}/ / s/^ *// s/ *$// @end group @group ! # add a new-line and 80 spaces to end of line G @end group @group ! # keep first 81 chars (80 + a new-line) s/^\(.\@{81\@}\).*$/\1/ @end group @group ! # \2 matches half of the spaces, which are moved to the beginning s/^\(.*\)\n\(.*\)\2/\2\1/ @end group @end example @c end--------------------------------------------- @node Increment a number ! @section Increment a Number This script is one of a few that demonstrate how to do arithmetic ! in @command{sed}. This is indeed possible,@footnote{@command{sed} guru Greg ! Ubben wrote an implementation of the @command{dc} @sc{rpn} calculator! ! It is distributed together with sed.} but must be done manually. To increment one number you just add 1 to last digit, replacing it by the following digit. There is one exception: when the digit *************** *** 1591,1597 **** This solution by Bruno Haible is very clever and smart because it uses a single buffer; if you don't have this limitation, the ! algorithm used in @ref{cat -n, Numbering lines} is faster. It works by replacing trailing nines with an underscore, then using multiple @code{s} commands to increment the last digit, and then again substituting underscores with zeros. --- 1547,1553 ---- This solution by Bruno Haible is very clever and smart because it uses a single buffer; if you don't have this limitation, the ! algorithm used in @ref{cat -n, Numbering lines}, is faster. It works by replacing trailing nines with an underscore, then using multiple @code{s} commands to increment the last digit, and then again substituting underscores with zeros. *************** *** 1603,1621 **** /[^0-9]/ d @group ! #@r{ replace all leading 9s by _ (any other char except digits, could} ! #@r{ be used)} :d s/9\(_*\)$/_\1/ td @end group @group ! #@r{ incr last digit only. The first line adds a most-significant} ! #@r{ digit of 1 if we have to add a digit.} ! #@r{} ! #@r{ The @code{tn} commands are not necessary, but make the thing} ! #@r{ faster} @end group @group --- 1559,1577 ---- /[^0-9]/ d @group ! # replace all leading 9s by _ (any other character except digits, could ! # be used) :d s/9\(_*\)$/_\1/ td @end group @group ! # incr last digit only. The first line adds a most-significant ! # digit of 1 if we have to add a digit. ! # ! # The @code{tn} commands are not necessary, but make the thing ! # faster @end group @group *************** *** 1639,1653 **** @c end--------------------------------------------- @node Rename files to lower case ! @section Rename files to lower case ! This is a pretty strange use of @code{sed}. We transform text, and transform it to be shell commands, then just feed them to shell. ! Don't worry, even worse hacks are done when using @code{sed}; I have ! seen a script converting the output of @code{date} into a @code{bc} program! ! The main body of this is the @code{sed} script, which remaps the name from lower to upper (or vice-versa) and even checks out if the remapped name is the same as the original name. Note how the script is parameterized using shell --- 1595,1609 ---- @c end--------------------------------------------- @node Rename files to lower case ! @section Rename Files to Lower Case ! This is a pretty strange use of @command{sed}. We transform text, and transform it to be shell commands, then just feed them to shell. ! Don't worry, even worse hacks are done when using @command{sed}; I have ! seen a script converting the output of @command{date} into a @command{bc} program! ! The main body of this is the @command{sed} script, which remaps the name from lower to upper (or vice-versa) and even checks out if the remapped name is the same as the original name. Note how the script is parameterized using shell *************** *** 1657,1671 **** @example @group #! /bin/sh ! #@r{ rename files to lower/upper case... } ! #@r{} ! #@r{ usage: } ! #@r{ move-to-lower * } ! #@r{ move-to-upper * } ! #@r{ or} ! #@r{ move-to-lower -R .} ! #@r{ move-to-upper -R .} ! #@r{} @end group @group --- 1613,1627 ---- @example @group #! /bin/sh ! # rename files to lower/upper case... ! # ! # usage: ! # move-to-lower * ! # move-to-upper * ! # or ! # move-to-lower -R . ! # move-to-upper -R . ! # @end group @group *************** *** 1697,1703 **** @group apply_cmd='sh' ! finder='echo $* | tr " " "\n"' files_only= @end group --- 1653,1659 ---- @group apply_cmd='sh' ! finder='echo "$@" | tr " " "\n"' files_only= @end group *************** *** 1706,1712 **** do case "$1" in -n) apply_cmd='cat' ;; ! -R) finder='find $* -type f';; -h) help ; exit 1 ;; *) break ;; esac --- 1662,1668 ---- do case "$1" in -n) apply_cmd='cat' ;; ! -R) finder='find "$@" -type f';; -h) help ; exit 1 ;; *) break ;; esac *************** *** 1716,1722 **** @group if [ -z "$1" ]; then ! echo Usage: $0 [-n] [-r] files... exit 1 fi @end group --- 1672,1678 ---- @group if [ -z "$1" ]; then ! echo Usage: $0 [-h] [-n] [-r] files... exit 1 fi @end group *************** *** 1736,1786 **** eval $finder | sed -n ' @group ! #@r{ remove all trailing slashes} s/\/*$// @end group @group ! #@r{ add ./ if there are no path, only filename} /\//! s/^/.\// @end group @group ! #@r{ save path+filename} h @end group @group ! #@r{ remove path} s/.*\/// @end group @group ! #@r{ do conversion only on filename} y/'$FROM'/'$TO'/ @end group @group ! #@r{ now line contains original path+file, while} ! #@r{ hold space contains the new filename} x @end group @group ! #@r{ add converted file name to line, which now contains} ! #@r{ @var{path}/@var{file-name}\n@var{converted-file-name}} G @end group @group ! #@r{ check if converted file name is equal to original file name,} ! #@r{ if it is, do not print nothing} /^.*\/\(.*\)\n\1/b @end group @group ! #@r{ now, transform @code{@var{path}/@var{fromfile}\n@var{tofile}}, into} ! #@r{ @code{mv @var{path}/@var{fromfile} @var{path}/@var{tofile}} and print it} s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p @end group --- 1692,1742 ---- eval $finder | sed -n ' @group ! # remove all trailing slashes s/\/*$// @end group @group ! # add ./ if there is no path, only a filename /\//! s/^/.\// @end group @group ! # save path+filename h @end group @group ! # remove path s/.*\/// @end group @group ! # do conversion only on filename y/'$FROM'/'$TO'/ @end group @group ! # now line contains original path+file, while ! # hold space contains the new filename x @end group @group ! # add converted file name to line, which now contains ! # path/file-name\nconverted-file-name G @end group @group ! # check if converted file name is equal to original file name, ! # if it is, do not print nothing /^.*\/\(.*\)\n\1/b @end group @group ! # now, transform path/fromfile\n, into ! # mv path/fromfile path/tofile and print it s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p @end group *************** *** 1789,1798 **** @c end--------------------------------------------- @node Print bash environment ! @section Print bash environment This script strips the definition of the shell functions ! from the output of the @code{set} Bourne-shell command. @c start------------------------------------------- @example --- 1745,1754 ---- @c end--------------------------------------------- @node Print bash environment ! @section Print @command{bash} Environment This script strips the definition of the shell functions ! from the output of the @command{set} Bourne-shell command. @c start------------------------------------------- @example *************** *** 1804,1833 **** @end group @group ! #@r{ if no occurrence of @samp{=()} print and load next line} ! /=() /! @{ p; b; @} @end group @group ! #@r{ possible start of functions section} ! #@r{ save the line in case this is a var like FOO="() "} h @end group @group ! #@r{ if the next line has a brace, we quit because} ! #@r{ nothing comes after functions} n /^@{/ q @end group @group ! #@r{ print the old line} x; p @end group @group ! #@r{ work on the new line now} x; bx ' @end group --- 1760,1795 ---- @end group @group ! @ifinfo ! # if no occurrence of "=()" print and load next line ! @end ifinfo ! @ifnotinfo ! # if no occurrence of @samp{=()} print and load next line ! @end ifnotinfo ! /=()/! @{ p; b; @} ! / () $/! @{ p; b; @} @end group @group ! # possible start of functions section ! # save the line in case this is a var like FOO="() " h @end group @group ! # if the next line has a brace, we quit because ! # nothing comes after functions n /^@{/ q @end group @group ! # print the old line x; p @end group @group ! # work on the new line now x; bx ' @end group *************** *** 1835,1841 **** @c end--------------------------------------------- @node Reverse chars of lines ! @section Reverse chars of lines This script can be used to reverse the position of characters in lines. The technique moves two characters at a time, hence --- 1797,1803 ---- @c end--------------------------------------------- @node Reverse chars of lines ! @section Reverse Characters of Lines This script can be used to reverse the position of characters in lines. The technique moves two characters at a time, hence *************** *** 1845,1852 **** This is often needed to reset the flag that is tested by the @code{t} command. ! Imaginative readers will find uses to this script. An example ! is reversing the output of @code{banner}@footnote{This requires another script to pad the output of banner; for example @example --- 1807,1814 ---- This is often needed to reset the flag that is tested by the @code{t} command. ! Imaginative readers will find uses for this script. An example ! is reversing the output of @command{banner}.@footnote{This requires another script to pad the output of banner; for example @example *************** *** 1856,1862 **** sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' | ~/sedscripts/reverseline.sed @end example ! }. @c start------------------------------------------- @example --- 1818,1824 ---- sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' | ~/sedscripts/reverseline.sed @end example ! } @c start------------------------------------------- @example *************** *** 1865,1879 **** /../! b @group ! #@r{ Reverse a line. Begin embedding the line between two new-lines} s/^.*$/\ &\ / @end group @group ! #@r{ Move first character at the end. The regexp matches until} ! #@r{ there are zero or one characters between the markers} tx :x s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ --- 1827,1841 ---- /../! b @group ! # Reverse a line. Begin embedding the line between two new-lines s/^.*$/\ &\ / @end group @group ! # Move first character at the end. The regexp matches until ! # there are zero or one characters between the markers tx :x s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ *************** *** 1881,1931 **** @end group @group ! #@r{ Remove the new-line markers} s/\n//g @end group @end example @c end--------------------------------------------- @node tac ! @section Reverse lines of files This one begins a series of totally useless (yet interesting) scripts emulating various Unix commands. This, in particular, ! is a @code{tac} workalike. ! Note that on implementations other than @sc{gnu} @code{sed} ! and @value{SSED} this script might easily overflow internal buffers. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! #@r{ reverse all lines of input, i.e. first line became last, ...} @group ! #@r{ from the second line, the buffer (which contains all previous lines)} ! #@r{ is *appended* to current line, so, the order will be reversed} 1! G @end group @group ! #@r{ on the last line we're done -- print everything} $ p @end group @group ! #@r{ store everything on the buffer again} h @end group @end example @c end--------------------------------------------- @node cat -n ! @section Numbering lines ! This script replaces @code{cat -n}; in fact it formats its output ! exactly like @sc{gnu} @code{cat} does. Of course this is completely useless and for two reasons: first, because somebody else did it in C, second, because the following --- 1843,1896 ---- @end group @group ! # Remove the new-line markers s/\n//g @end group @end example @c end--------------------------------------------- @node tac ! @section Reverse Lines of Files This one begins a series of totally useless (yet interesting) scripts emulating various Unix commands. This, in particular, ! is a @command{tac} workalike. ! Note that on implementations other than @acronym{GNU} @command{sed} ! @ifset PERL ! and @value{SSED} ! @end ifset ! this script might easily overflow internal buffers. @c start------------------------------------------- @example #!/usr/bin/sed -nf ! # reverse all lines of input, i.e. first line became last, ... @group ! # from the second line, the buffer (which contains all previous lines) ! # is *appended* to current line, so, the order will be reversed 1! G @end group @group ! # on the last line we're done -- print everything $ p @end group @group ! # store everything on the buffer again h @end group @end example @c end--------------------------------------------- @node cat -n ! @section Numbering Lines ! This script replaces @samp{cat -n}; in fact it formats its output ! exactly like @acronym{GNU} @command{cat} does. Of course this is completely useless and for two reasons: first, because somebody else did it in C, second, because the following *************** *** 1945,1952 **** @end example @c end--------------------------------------------- ! It uses @code{sed} to print the line number, then groups lines two ! by two using N. Of course, this script does not teach as much as the one presented below. The algorithm used for incrementing uses both buffers, so the line --- 1910,1917 ---- @end example @c end--------------------------------------------- ! It uses @command{sed} to print the line number, then groups lines two ! by two using @code{N}. Of course, this script does not teach as much as the one presented below. The algorithm used for incrementing uses both buffers, so the line *************** *** 1954,1960 **** is split so that changing digits go in a buffer and unchanged ones go in the other; the changed digits are modified in a single step (using a @code{y} command). The line number for the next line ! is then composed and stored in hold space, to be used in the next iteration. @c start------------------------------------------- --- 1919,1925 ---- is split so that changing digits go in a buffer and unchanged ones go in the other; the changed digits are modified in a single step (using a @code{y} command). The line number for the next line ! is then composed and stored in the hold space, to be used in the next iteration. @c start------------------------------------------- *************** *** 1962,1999 **** #!/usr/bin/sed -nf @group ! #@r{ Prime the pump on the first line} x /^$/ s/^.*$/1/ @end group @group ! #@r{ Add the correct line number before the pattern} G h @end group @group ! #@r{ Format it and print it} s/^/ / s/^ *\(......\)\n/\1 /p @end group @group ! #@r{ Get the line number from hold space; add a zero} ! #@r{ if we're going to add a digit on the next line} g s/\n.*$// /^9*$/ s/^/0/ @end group @group ! #@r{ separate changing/unchanged digits with an x} s/.9*$/x&/ @end group @group ! #@r{ keep changing digits in hold space} h s/^.*x// y/0123456789/1234567890/ --- 1927,1964 ---- #!/usr/bin/sed -nf @group ! # Prime the pump on the first line x /^$/ s/^.*$/1/ @end group @group ! # Add the correct line number before the pattern G h @end group @group ! # Format it and print it s/^/ / s/^ *\(......\)\n/\1 /p @end group @group ! # Get the line number from hold space; add a zero ! # if we're going to add a digit on the next line g s/\n.*$// /^9*$/ s/^/0/ @end group @group ! # separate changing/unchanged digits with an x s/.9*$/x&/ @end group @group ! # keep changing digits in hold space h s/^.*x// y/0123456789/1234567890/ *************** *** 2001,2012 **** @end group @group ! #@r{ keep unchanged digits in pattern space} s/x.*$// @end group @group ! #@r{ compose the new number, remove the new-line implicitly added by G} G s/\n// h --- 1966,1977 ---- @end group @group ! # keep unchanged digits in pattern space s/x.*$// @end group @group ! # compose the new number, remove the new-line implicitly added by G G s/\n// h *************** *** 2015,2027 **** @c end--------------------------------------------- @node cat -b ! @section Numbering non-blank lines ! Emulating @code{cat -b} is almost the same as @code{cat -n}---we only have to select which lines are to be numbered and which are not. The part that is common to this script and the previous one is ! not commented to show how important it is to comment @code{sed} scripts properly... @c start------------------------------------------- --- 1980,1992 ---- @c end--------------------------------------------- @node cat -b ! @section Numbering Non-blank Lines ! Emulating @samp{cat -b} is almost the same as @samp{cat -n}---we only have to select which lines are to be numbered and which are not. The part that is common to this script and the previous one is ! not commented to show how important it is to comment @command{sed} scripts properly... @c start------------------------------------------- *************** *** 2036,2042 **** @end group @group ! #@r{ Same as cat -n from now} x /^$/ s/^.*$/1/ G --- 2001,2007 ---- @end group @group ! # Same as cat -n from now x /^$/ s/^.*$/1/ G *************** *** 2060,2074 **** @c end--------------------------------------------- @node wc -c ! @section Counting chars ! This script shows another way to do arithmetic with @code{sed}. In this case we have to add possibly large numbers, so implementing this by successive increments would not be feasible (and possibly ! even more complicated to contrive than this script...). The approach is to map numbers to letters, kind of an abacus ! implemented with @code{sed}. @samp{a}s are units, @samp{b}s are tenths and so on: we simply add the number of characters on the current line as units, and then propagate the carry to tenths, hundredths, and so on. --- 2025,2039 ---- @c end--------------------------------------------- @node wc -c ! @section Counting Characters ! This script shows another way to do arithmetic with @command{sed}. In this case we have to add possibly large numbers, so implementing this by successive increments would not be feasible (and possibly ! even more complicated to contrive than this script). The approach is to map numbers to letters, kind of an abacus ! implemented with @command{sed}. @samp{a}s are units, @samp{b}s are tenths and so on: we simply add the number of characters on the current line as units, and then propagate the carry to tenths, hundredths, and so on. *************** *** 2088,2094 **** #!/usr/bin/sed -nf @group ! #@r{ Add n+1 a's to hold space (+1 is for the new-line)} s/./a/g H x --- 2053,2059 ---- #!/usr/bin/sed -nf @group ! # Add n+1 a's to hold space (+1 is for the new-line) s/./a/g H x *************** *** 2096,2103 **** @end group @group ! #@r{ Do the carry. The t's and b's are not necessary,} ! #@r{ but they do speed up the thing} t a : a; s/aaaaaaaaaa/b/g; t b; b done : b; s/bbbbbbbbbb/c/g; t c; b done --- 2061,2068 ---- @end group @group ! # Do the carry. The t's and b's are not necessary, ! # but they do speed up the thing t a : a; s/aaaaaaaaaa/b/g; t b; b done : b; s/bbbbbbbbbb/c/g; t c; b done *************** *** 2117,2123 **** @} @end group ! #@r{ On the last line, convert back to decimal} @group : loop --- 2082,2088 ---- @} @end group ! # On the last line, convert back to decimal @group : loop *************** *** 2143,2169 **** @c end--------------------------------------------- @node wc -w ! @section Counting words This script is almost the same as the previous one, once each of the words on the line is converted to a single @samp{a} (in the previous script each letter was changed to an @samp{a}). ! It is interesting that real @code{wc} programs have optimized ! loops for @code{wc -c}, so they are much slower at counting ! words rather than characters. These scripts' bottleneck, instead, is arithmetic, and hence the word-counting one is faster (it has to manage smaller numbers). Again, the common parts are not commented to show the importance ! of commenting @code{sed} scripts. @c start------------------------------------------- @example #!/usr/bin/sed -nf @group ! #@r{ Convert words to a's} s/[ @kbd{tab}][ @kbd{tab}]*/ /g s/^/ / s/ [^ ][^ ]*/a /g --- 2108,2134 ---- @c end--------------------------------------------- @node wc -w ! @section Counting Words This script is almost the same as the previous one, once each of the words on the line is converted to a single @samp{a} (in the previous script each letter was changed to an @samp{a}). ! It is interesting that real @command{wc} programs have optimized ! loops for @samp{wc -c}, so they are much slower at counting ! words rather than characters. This script's bottleneck, instead, is arithmetic, and hence the word-counting one is faster (it has to manage smaller numbers). Again, the common parts are not commented to show the importance ! of commenting @command{sed} scripts. @c start------------------------------------------- @example #!/usr/bin/sed -nf @group ! # Convert words to a's s/[ @kbd{tab}][ @kbd{tab}]*/ /g s/^/ / s/ [^ ][^ ]*/a /g *************** *** 2171,2184 **** @end group @group ! #@r{ Append them to hold space} H x s/\n// @end group @group ! #@r{ From here on it is the same as in wc -c.} /aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g /bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g /cccccccccc/! bx; s/cccccccccc/d/g --- 2136,2149 ---- @end group @group ! # Append them to hold space H x s/\n// @end group @group ! # From here on it is the same as in wc -c. /aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g /bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g /cccccccccc/! bx; s/cccccccccc/d/g *************** *** 2208,2217 **** @c end--------------------------------------------- @node wc -l ! @section Counting lines ! No strange things are done now, because @code{sed} gives us ! @code{wc -l} functionality for free!!! Look: @c start------------------------------------------- @example --- 2173,2182 ---- @c end--------------------------------------------- @node wc -l ! @section Counting Lines ! No strange things are done now, because @command{sed} gives us ! @samp{wc -l} functionality for free!!! Look: @c start------------------------------------------- @example *************** *** 2223,2231 **** @c end--------------------------------------------- @node head ! @section Printing the first lines ! This script is probably the simplest useful @code{sed} script. It displays the first 10 lines of input; the number of displayed lines is right before the @code{q} command. --- 2188,2196 ---- @c end--------------------------------------------- @node head ! @section Printing the First Lines ! This script is probably the simplest useful @command{sed} script. It displays the first 10 lines of input; the number of displayed lines is right before the @code{q} command. *************** *** 2239,2252 **** @c end--------------------------------------------- @node tail ! @section Printing the last lines Printing the last @var{n} lines rather than the first is more complex but indeed possible. @var{n} is encoded in the second line, before the bang character. ! This script is similar to the @code{tac} script in that it keeps the ! final output in hold space and prints it at the end: @c start------------------------------------------- @example --- 2204,2217 ---- @c end--------------------------------------------- @node tail ! @section Printing the Last Lines Printing the last @var{n} lines rather than the first is more complex but indeed possible. @var{n} is encoded in the second line, before the bang character. ! This script is similar to the @command{tac} script in that it keeps the ! final output in the hold space and prints it at the end: @c start------------------------------------------- @example *************** *** 2267,2283 **** restart the loop). The ``sliding window'' technique is a very powerful way to write ! efficient and complex @code{sed} scripts, because commands like @code{P} would require a lot of work if implemented manually. To introduce the technique, which is fully demonstrated in the rest of this chapter and is based on the @code{N}, @code{P} ! and @code{D} commands, here is an implementation of @code{tail} ! using a simple `sliding window'. This looks complicated but in fact the working is the same as the last script: after we have kicked in the appropriate number ! of lines, however, we stop using hold space to keep inter-line state, and instead use @code{N} and @code{D} to slide pattern space by one line: --- 2232,2248 ---- restart the loop). The ``sliding window'' technique is a very powerful way to write ! efficient and complex @command{sed} scripts, because commands like @code{P} would require a lot of work if implemented manually. To introduce the technique, which is fully demonstrated in the rest of this chapter and is based on the @code{N}, @code{P} ! and @code{D} commands, here is an implementation of @command{tail} ! using a simple ``sliding window.'' This looks complicated but in fact the working is the same as the last script: after we have kicked in the appropriate number ! of lines, however, we stop using the hold space to keep inter-line state, and instead use @code{N} and @code{D} to slide pattern space by one line: *************** *** 2298,2304 **** @node uniq ! @section Make duplicate lines unique This is an example of the art of using the @code{N}, @code{P} and @code{D} commands, probably the most difficult to master. --- 2263,2269 ---- @node uniq ! @section Make Duplicate Lines Unique This is an example of the art of using the @code{N}, @code{P} and @code{D} commands, probably the most difficult to master. *************** *** 2312,2318 **** @group :b ! @r{On the last line, print and exit} $b N /^\(.*\)\n\1$/ @{ --- 2277,2283 ---- @group :b ! # On the last line, print and exit $b N /^\(.*\)\n\1$/ @{ *************** *** 2324,2336 **** @end group @group ! #@r{ If the @code{N} command had added the last line, print and exit} $b @end group @group ! #@r{ The lines are different; print the first and go} ! #@r{ back working on the second.} P D @end group --- 2289,2301 ---- @end group @group ! # If the @code{N} command had added the last line, print and exit $b @end group @group ! # The lines are different; print the first and go ! # back working on the second. P D @end group *************** *** 2338,2349 **** @c end--------------------------------------------- As you can see, we mantain a 2-line window using @code{P} and @code{D}. ! This technique is often used in advanced @code{sed} scripts. @node uniq -d ! @section Print duplicated lines of input ! This script prints only duplicated lines, like @code{uniq -d}. @c start------------------------------------------- @example --- 2303,2314 ---- @c end--------------------------------------------- As you can see, we mantain a 2-line window using @code{P} and @code{D}. ! This technique is often used in advanced @command{sed} scripts. @node uniq -d ! @section Print Duplicated Lines of Input ! This script prints only duplicated lines, like @samp{uniq -d}. @c start------------------------------------------- @example *************** *** 2353,2365 **** $b N /^\(.*\)\n\1$/ @{ ! #@r{ Print the first of the duplicated lines} s/.*\n// p @end group @group ! #@r{ Loop until we get a different line} :b $b N --- 2318,2330 ---- $b N /^\(.*\)\n\1$/ @{ ! # Print the first of the duplicated lines s/.*\n// p @end group @group ! # Loop until we get a different line :b $b N *************** *** 2371,2399 **** @end group @group ! #@r{ The last line cannot be followed by duplicates} $b @end group @group ! #@r{ Found a different one. Leave it alone in the pattern space} ! #@r{ and go back to the top, hunting its duplicates} D @end group @end example @c end--------------------------------------------- @node uniq -u ! @section Remove all duplicated lines ! This script prints only unique lines, like @code{uniq -u}. @c start------------------------------------------- @example #!/usr/bin/sed -f @group ! #@r{ Search for a duplicate line --- until that, print what you find.} $b N /^\(.*\)\n\1$/ ! @{ --- 2336,2364 ---- @end group @group ! # The last line cannot be followed by duplicates $b @end group @group ! # Found a different one. Leave it alone in the pattern space ! # and go back to the top, hunting its duplicates D @end group @end example @c end--------------------------------------------- @node uniq -u ! @section Remove All Duplicated Lines ! This script prints only unique lines, like @samp{uniq -u}. @c start------------------------------------------- @example #!/usr/bin/sed -f @group ! # Search for a duplicate line --- until that, print what you find. $b N /^\(.*\)\n\1$/ ! @{ *************** *** 2404,2417 **** @group :c ! #@r{ Got two equal lines in pattern space. At the} ! #@r{ end of the file we simply exit} $d @end group @group ! #@r{ Else, we keep reading lines with @code{N} until we} ! #@r{ find a different one} s/.*\n// N /^\(.*\)\n\1$/ @{ --- 2369,2382 ---- @group :c ! # Got two equal lines in pattern space. At the ! # end of the file we simply exit $d @end group @group ! # Else, we keep reading lines with @code{N} until we ! # find a different one s/.*\n// N /^\(.*\)\n\1$/ @{ *************** *** 2420,2437 **** @end group @group ! #@r{ Remove the last instance of the duplicate line} ! #@r{ and go back to the top} D @end group @end example @c end--------------------------------------------- @node cat -s ! @section Squeezing blank lines As a final example, here are three scripts, of increasing complexity ! and speed, that implement the same function as @code{cat -s}, that is squeezing blank lines. The first leaves a blank line at the beginning and end if there are --- 2385,2402 ---- @end group @group ! # Remove the last instance of the duplicate line ! # and go back to the top D @end group @end example @c end--------------------------------------------- @node cat -s ! @section Squeezing Blank Lines As a final example, here are three scripts, of increasing complexity ! and speed, that implement the same function as @samp{cat -s}, that is squeezing blank lines. The first leaves a blank line at the beginning and end if there are *************** *** 2442,2449 **** #!/usr/bin/sed -f @group ! #@r{ on empty lines, join with next} ! #@r{ Note there is a star in the regexp} :x /^\n*$/ @{ N --- 2407,2414 ---- #!/usr/bin/sed -f @group ! # on empty lines, join with next ! # Note there is a star in the regexp :x /^\n*$/ @{ N *************** *** 2452,2459 **** @end group @group ! #@r{ now, squeeze all '\n', this can be also done by:} ! #@r{ @code{s/^\(\n\)*/\1/}} s/\n*/\ / @end group --- 2417,2424 ---- @end group @group ! # now, squeeze all '\n', this can be also done by: ! # s/^\(\n\)*/\1/ s/\n*/\ / @end group *************** *** 2469,2483 **** #!/usr/bin/sed -f @group ! #@r{ delete all leading empty lines} 1,/^./@{ /./!d @} @end group @group ! #@r{ on an empty line we remove it and all the following} ! #@r{ empty lines, but one} :x /./!@{ N --- 2434,2448 ---- #!/usr/bin/sed -f @group ! # delete all leading empty lines 1,/^./@{ /./!d @} @end group @group ! # on an empty line we remove it and all the following ! # empty lines, but one :x /./!@{ N *************** *** 2490,2496 **** This removes leading and trailing blank lines. It is also the fastest. Note that loops are completely done with @code{n} and ! @code{b}, without exploting the fact that @code{sed} cycles back to the top of the script automatically at the end of a line. @c start------------------------------------------- --- 2455,2461 ---- This removes leading and trailing blank lines. It is also the fastest. Note that loops are completely done with @code{n} and ! @code{b}, without exploting the fact that @command{sed} cycles back to the top of the script automatically at the end of a line. @c start------------------------------------------- *************** *** 2498,2532 **** #!/usr/bin/sed -nf @group ! #@r{ delete all (leading) blanks} /./!d @end group @group ! #@r{ get here: so there is a non empty} :x ! #@r{ print it} p ! #@r{ get next} n ! #@r{ got chars? print it again, etc... } /./bx @end group @group ! #@r{ no, don't have chars: got an empty line} :z ! #@r{ get next, if last line we finish here so no trailing} ! #@r{ empty lines are written} n ! #@r{ also empty? then ignore it, and get next... this will} ! #@r{ remove ALL empty lines} /./!bz @end group @group ! #@r{ all empty lines were deleted/ignored, but we have a non empty. As} ! #@r{ what we want to do is to squeeze, insert a blank line artificially} i\ @end group --- 2463,2497 ---- #!/usr/bin/sed -nf @group ! # delete all (leading) blanks /./!d @end group @group ! # get here: so there is a non empty :x ! # print it p ! # get next n ! # got chars? print it again, etc... /./bx @end group @group ! # no, don't have chars: got an empty line :z ! # get next, if last line we finish here so no trailing ! # empty lines are written n ! # also empty? then ignore it, and get next... this will ! # remove ALL empty lines /./!bz @end group @group ! # all empty lines were deleted/ignored, but we have a non empty. As ! # what we want to do is to squeeze, insert a blank line artificially i\ @end group *************** *** 2535,2553 **** @c end--------------------------------------------- @node Limitations ! @chapter @value{SSED}'s limitations and non-limitations @cindex @acronym{GNU} extensions, unlimited line length @cindex Portability, line length limitations ! For those who want to write portable @code{sed} scripts, be aware that some implementations have been known to limit line lengths (for the pattern and hold spaces) to be no more than 4000 bytes. ! The @sc{posix.2} standard specifies that conforming @code{sed} implementations shall support at least 8192 byte line lengths. @value{SSED} has no built-in limit on line length; ! as long as it can malloc() more (virtual) memory, ! you can feed or construct lines as long as you care. However, recursion is used to handle subpatterns and indefinite repetition. This means that the available stack space may limit --- 2500,2518 ---- @c end--------------------------------------------- @node Limitations ! @chapter @value{SSED}'s Limitations and Non-limitations @cindex @acronym{GNU} extensions, unlimited line length @cindex Portability, line length limitations ! For those who want to write portable @command{sed} scripts, be aware that some implementations have been known to limit line lengths (for the pattern and hold spaces) to be no more than 4000 bytes. ! The @sc{posix} standard specifies that conforming @command{sed} implementations shall support at least 8192 byte line lengths. @value{SSED} has no built-in limit on line length; ! as long as it can @code{malloc()} more (virtual) memory, ! you can feed or construct lines as long as you like. However, recursion is used to handle subpatterns and indefinite repetition. This means that the available stack space may limit *************** *** 2572,2578 **** encountered. Here are a few distinctions between the real Perl-style ! regular expressions and those that @code{-R} recognizes. @enumerate @item --- 2537,2543 ---- encountered. Here are a few distinctions between the real Perl-style ! regular expressions and those that @option{-R} recognizes. @enumerate @item *************** *** 2629,2642 **** @end ifset @node Other Resources ! @chapter Other resources for learning about @code{sed} ! @cindex Additional reading about @code{sed} ! In addition to several books that have been written about @code{sed} (either specifically or as chapters in books which discuss ! shell programming), one can find out more about @code{sed} (including suggestions of a few books) from the FAQ ! for the sed-users mailing list, available from any of: @display @uref{http://www.student.northpark.edu/pemente/sed/sedfaq.html} @uref{http://sed.sf.net/grabbag/tutorials/sedfaq.html} --- 2594,2607 ---- @end ifset @node Other Resources ! @chapter Other Resources for Learning About @command{sed} ! @cindex Additional reading about @command{sed} ! In addition to several books that have been written about @command{sed} (either specifically or as chapters in books which discuss ! shell programming), one can find out more about @command{sed} (including suggestions of a few books) from the FAQ ! for the @code{sed-users} mailing list, available from any of: @display @uref{http://www.student.northpark.edu/pemente/sed/sedfaq.html} @uref{http://sed.sf.net/grabbag/tutorials/sedfaq.html} *************** *** 2645,2747 **** Also of interest are @uref{http://www.student.northpark.edu/pemente/sed/index.htm} and @uref{http://sed.sf.net/grabbag}, ! which include sed tutorials and other sed-related goodies. ! There is a ``sed-users'' mailing list maintained by Sven Guckes. To subscribe, visit @uref{http://groups.yahoo.com} and search for the @code{sed-users} mailing list. @node Reporting Bugs ! @chapter Reporting bugs @cindex Bugs, reporting Email bug reports to @email{bonzini@@gnu.org}. Be sure to include the word ``sed'' somewhere in the @code{Subject:} field. ! Also, please include the output of @code{sed --version} in the body of your report if at all possible. Please do not send a bug report like this: @example ! @r{[while building frobme-1.3.4]} ! $ configure ! sed: file sedscr line 1: Unknown option to 's' @end example If @value{SSED} doesn't configure your favorite package, take a few extra minutes to identify the specific problem and make a stand-alone test case. Unlike other programs such as C compilers, making such test ! cases for @code{sed} is quite simple. A stand-alone test case includes all the data necessary to perform the ! test, and the specific invocation of @code{sed} that causes the problem. The smaller a stand-alone test case is, the better. A test case should ! not involve something as far removed from @code{sed} as ``try to configure frobme-1.3.4''. Yes, that is in principle enough information to look for the bug, but that is not a very practical prospect. Here are a few commonly reported bugs that are not bugs. @table @asis ! @item @code{sed -n} and @code{s/regex/replace/p} ! @cindex Portability, @code{p} command and @samp{-n} flag ! @cindex Non-bugs, @code{p} command and @samp{-n} flag ! Some versions of sed ignore the `p' (print) option of an `s' command ! unless the `-n' command switch has been specified. Other versions ! always honor the `p' option. Both approaches are allowed by @sc{posix.2} ! and @sc{gnu} @code{sed} (on which @value{SSED} is based) is the ! latter sort; I judge this approach to be better (give enough rope ! etc.) when you write complex scripts, but portable scripts should ! be written to work correctly with either behavior. ! @item regex syntax clashes @cindex @acronym{GNU} extensions, to basic regular expressions @cindex Non-bugs, regex syntax clashes ! @code{sed} uses the Posix basic regular expression syntax. According to the standard, the meaning of some escape sequences is undefined in ! this syntax; notable in the case of @code{sed} are @code{\|}, ! @code{\+}, @code{\?}, @code{\@code{}, @code{\}}, @code{\<}, @code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}. ! As in all GNU programs that use Posix basic regular expressions, sed interprets these escape sequences as meta-characters. So, @code{x\+} ! matches one or more occurrences of @code{x}. @code{abc\|def} matches ! either @code{abc} or @code{def}. This syntax may cause problems when running scripts written for other ! @code{sed}s. Some @code{sed} programs have been written with the assumption that @code{\|} and @code{\+} match the literal characters @code{|} and @code{+}. Such scripts must be modified by removing the spurious backslashes if they are to be used with modern implementations ! of @code{sed}, like @sc{gnu} @code{sed} or @value{SSED}. @cindex @acronym{GNU} extensions, special escapes ! In addition, this version of @code{sed} supports several escape characters (some of which are multi-character) to insert non-printable characters in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r}, @code{\t}, @code{\v}, @code{\x}). These can cause similar problems ! with scripts written for other @code{sed}s. ! @item @code{-i} clobbers read-only files @cindex In-place editing @cindex @value{SSEDEXT}, in-place editing @cindex Non-bugs, in-place editing ! In short, @code{sed d -i} will let one delete the contents of ! a read-only file, and in general the @code{-i} option ! (@pxref{Invoking sed, , Invocation} will let one clobber protected files. This is not a bug, but rather a consequence of how the Unix filesystem works. The permissions on a file say what can happen to the data in that file, while the permissions on a directory say what can ! happen to the list of files in that directory. @code{sed -i} ! will not ever open for writing a file that is already on disk, ! rather, it will work on a temporary file that is finally renamed to the original name: if you rename or delete files, you're actually modifying the contents of the directory, so the operation depends on ! the permissions of the directory, not of the file). For this same ! reason, @code{sed} will not let one use @code{-i} on a writeable file in a read-only directory (but unbelievably nobody reports that as a bug@dots{}). @end table --- 2610,2755 ---- Also of interest are @uref{http://www.student.northpark.edu/pemente/sed/index.htm} and @uref{http://sed.sf.net/grabbag}, ! which include @command{sed} tutorials and other @command{sed}-related goodies. ! The @code{sed-users} mailing list itself maintained by Sven Guckes. To subscribe, visit @uref{http://groups.yahoo.com} and search for the @code{sed-users} mailing list. @node Reporting Bugs ! @chapter Reporting Bugs @cindex Bugs, reporting Email bug reports to @email{bonzini@@gnu.org}. Be sure to include the word ``sed'' somewhere in the @code{Subject:} field. ! Also, please include the output of @samp{sed --version} in the body of your report if at all possible. Please do not send a bug report like this: @example ! @i{while building frobme-1.3.4} ! $ configure ! @error{} sed: file sedscr line 1: Unknown option to 's' @end example If @value{SSED} doesn't configure your favorite package, take a few extra minutes to identify the specific problem and make a stand-alone test case. Unlike other programs such as C compilers, making such test ! cases for @command{sed} is quite simple. A stand-alone test case includes all the data necessary to perform the ! test, and the specific invocation of @command{sed} that causes the problem. The smaller a stand-alone test case is, the better. A test case should ! not involve something as far removed from @command{sed} as ``try to configure frobme-1.3.4''. Yes, that is in principle enough information to look for the bug, but that is not a very practical prospect. Here are a few commonly reported bugs that are not bugs. @table @asis ! @item @samp{sed -n} and @samp{s/@var{regex}/@samp{replace}/p} ! @cindex Portability, @code{p} command and @option{-n} flag ! @cindex Non-bugs, @code{p} command and @option{-n} flag ! Some versions of @command{sed} ignore the @code{p} (print) option of an @code{s} command ! unless the @option{-n} command-line option has been specified. Other versions ! always honor the @code{p} option. ! @c CHECK THE CURRENT STANDARD. ADR. ! Both approaches are allowed by @sc{posix} ! and @acronym{GNU} @command{sed} ! @ifset PERL ! (on which @value{SSED} is based) ! @end ifset ! is the ! better when you write complex scripts and also more intuitive, but ! portable scripts should be written to work correctly with either ! behavior. ! @item @code{N} command on the last line ! @cindex Portability, @code{N} command on the last line ! @cindex Non-bugs, @code{N} command on the last line ! ! Most versions of @command{sed} exit without printing anything when ! the @command{N} command is issued on the last line of a file. ! @value{SSED} prints pattern space before exiting unless of course ! the @command{-n} command switch has been specified. This choice is ! by design. ! ! For example, the behavior of ! @example ! sed N foo bar ! @end example ! @noindent ! would depend on whether foo has an even or an odd number of ! lines@footnote{which is the actual ``bug'' that prompted the ! change in behavior}. Or, when writing a script to read the ! next few lines following a pattern match, traditional ! implementations of @code{sed} would force you to write ! something like ! @example ! /foo/@{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N @} ! @end example ! @noindent ! instead of just ! @example ! /foo/@{ N;N;N;N;N;N;N;N;N; @} ! @end example ! ! In any case, the simplest workaround is to use @code{$d;N} in ! scripts that rely on the traditional behavior. ! ! @item Regex syntax clashes @cindex @acronym{GNU} extensions, to basic regular expressions @cindex Non-bugs, regex syntax clashes ! @command{sed} uses the @sc{posix} basic regular expression syntax. According to the standard, the meaning of some escape sequences is undefined in ! this syntax; notable in the case of @command{sed} are @code{\|}, ! @code{\+}, @code{\?}, @code{\`}, @code{\'}, @code{\<}, @code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}. ! As in all GNU programs that use @sc{posix} basic regular expressions, @command{sed} interprets these escape sequences as meta-characters. So, @code{x\+} ! matches one or more occurrences of @samp{x}. @code{abc\|def} matches ! either @samp{abc} or @samp{def}. This syntax may cause problems when running scripts written for other ! @command{sed}s. Some @command{sed} programs have been written with the assumption that @code{\|} and @code{\+} match the literal characters @code{|} and @code{+}. Such scripts must be modified by removing the spurious backslashes if they are to be used with modern implementations ! of @command{sed}, like ! @ifset PERL ! @value{SSED} or ! @end ifset ! @acronym{GNU} @command{sed}. @cindex @acronym{GNU} extensions, special escapes ! In addition, this version of @command{sed} supports several escape characters (some of which are multi-character) to insert non-printable characters in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r}, @code{\t}, @code{\v}, @code{\x}). These can cause similar problems ! with scripts written for other @command{sed}s. ! @item @option{-i} clobbers read-only files @cindex In-place editing @cindex @value{SSEDEXT}, in-place editing @cindex Non-bugs, in-place editing ! In short, @samp{sed -i} will let you delete the contents of ! a read-only file, and in general the @option{-i} option ! (@pxref{Invoking sed, , Invocation}) lets you clobber protected files. This is not a bug, but rather a consequence of how the Unix filesystem works. The permissions on a file say what can happen to the data in that file, while the permissions on a directory say what can ! happen to the list of files in that directory. @samp{sed -i} ! will not ever open for writing a file that is already on disk. ! Rather, it will work on a temporary file that is finally renamed to the original name: if you rename or delete files, you're actually modifying the contents of the directory, so the operation depends on ! the permissions of the directory, not of the file. For this same ! reason, @command{sed} does not let you use @option{-i} on a writeable file in a read-only directory (but unbelievably nobody reports that as a bug@dots{}). @end table *************** *** 2759,2765 **** @noindent Examples: ! @table @samp @item abc? becomes @samp{abc\?} when using extended regular expressions. It matches the literal string @samp{abc?}. --- 2767,2773 ---- @noindent Examples: ! @table @code @item abc? becomes @samp{abc\?} when using extended regular expressions. It matches the literal string @samp{abc?}. *************** *** 2787,2797 **** @appendix Perl-style regular expressions @cindex Perl-style regular expressions, syntax ! @emph{This part is taken from the @code{pcre.txt} file distributed together with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.} Perl introduced several extensions to regular expressions, some of them incompatible with the syntax of regular expressions ! accepted by Emacs and other @sc{gnu} tools (whose matcher was based on the Emacs matcher). @value{SSED} implements both kinds of extensions. --- 2795,2806 ---- @appendix Perl-style regular expressions @cindex Perl-style regular expressions, syntax ! @emph{This part is taken from the @file{pcre.txt} file distributed together ! with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.} Perl introduced several extensions to regular expressions, some of them incompatible with the syntax of regular expressions ! accepted by Emacs and other @acronym{GNU} tools (whose matcher was based on the Emacs matcher). @value{SSED} implements both kinds of extensions. *************** *** 2827,2833 **** @code{\b}, but with subpatterns). @item ! You can often improve performance by avoiding that @code{sed} wastes time with backtracking @item --- 2836,2842 ---- @code{\b}, but with subpatterns). @item ! You can often improve performance by avoiding that @command{sed} wastes time with backtracking @item *************** *** 2886,2892 **** @cindex Perl-style regular expressions, backreferences The handling of a backslash followed by a digit other than 0 ! is complicated. Outside a character class, @code{sed} reads it and any following digits as a decimal number. If the number is less than 10, or if there have been at least that many previous capturing left parentheses in the expression, the --- 2895,2901 ---- @cindex Perl-style regular expressions, backreferences The handling of a backslash followed by a digit other than 0 ! is complicated. Outside a character class, @command{sed} reads it and any following digits as a decimal number. If the number is less than 10, or if there have been at least that many previous capturing left parentheses in the expression, the *************** *** 2896,2920 **** Inside a character class, or if the decimal number is greater than 9 and there have not been that many capturing ! subpatterns, @code{sed} re-reads up to three octal digits following the backslash, and generates a single byte from the least significant 8 bits of the value. Any subsequent digits stand for themselves. For example: @example ! \040 @r{is another way of writing a space} ! \40 @r{is the same, provided there are fewer than 40} ! @r{previous capturing subpatterns} ! \7 @r{is always a back reference} ! \011 @r{is always a tab} ! \11 @r{might be a back reference, or another way of} ! @r{writing a tab} ! \0113 @r{is a tab followed by the character @samp{3}} ! \113 @r{is the character with octal code 113 (since there} ! @r{can be no more than 99 back references)} ! \377 @r{is a byte consisting entirely of 1 bits (@sc{ascii} 255)} ! \81 @r{is either a back reference, or a binary zero} ! @r{followed by the two characters @samp{81}} @end example Note that octal values of 100 or greater must not be introduced --- 2905,2929 ---- Inside a character class, or if the decimal number is greater than 9 and there have not been that many capturing ! subpatterns, @command{sed} re-reads up to three octal digits following the backslash, and generates a single byte from the least significant 8 bits of the value. Any subsequent digits stand for themselves. For example: @example ! \040 @i{is another way of writing a space} ! \40 @i{is the same, provided there are fewer than 40} ! @i{previous capturing subpatterns} ! \7 @i{is always a back reference} ! \011 @i{is always a tab} ! \11 @i{might be a back reference, or another way of} ! @i{writing a tab} ! \0113 @i{is a tab followed by the character @samp{3}} ! \113 @i{is the character with octal code 113 (since there} ! @i{can be no more than 99 back references)} ! \377 @i{is a byte consisting entirely of 1 bits (@sc{ascii} 255)} ! \81 @i{is either a back reference, or a binary zero} ! @i{followed by the two characters @samp{81}} @end example Note that octal values of 100 or greater must not be introduced *************** *** 2984,2990 **** note that @code{\b} has a different meaning, namely the backspace character, inside a character class). Note that Perl mode does not support directly assertions ! for the beginning and the end of word; the @sc{gnu} extensions @code{\<} and @code{\>} achieve this purpose in @sc{posix} mode instead. --- 2993,2999 ---- note that @code{\b} has a different meaning, namely the backspace character, inside a character class). Note that Perl mode does not support directly assertions ! for the beginning and the end of word; the @acronym{GNU} extensions @code{\<} and @code{\>} achieve this purpose in @sc{posix} mode instead. *************** *** 2992,2998 **** from the traditional circumflex and dollar (described below) in that they only ever match at the very start and end of the subject string, whatever options are set; in particular @code{\A} ! and @code{\z} are the same as the @sc{gnu} extensions @code{\`} and @code{\'} that are active in @sc{posix} mode. @node Caret/dollar/full stop --- 3001,3007 ---- from the traditional circumflex and dollar (described below) in that they only ever match at the very start and end of the subject string, whatever options are set; in particular @code{\A} ! and @code{\z} are the same as the @acronym{GNU} extensions @code{\`} and @code{\'} that are active in @sc{posix} mode. @node Caret/dollar/full stop *************** *** 3395,3403 **** one that does not match the syntax of a quantifier, is taken as a literal character. For example, @{,6@} is not a quantifier, but a literal string of four characters.@footnote{It ! raises an error if @code{-R} is not used.} ! The quantifier @{0@} is permitted, causing the expression to behave as if the previous item and the quantifier were not present. --- 3404,3412 ---- one that does not match the syntax of a quantifier, is taken as a literal character. For example, @{,6@} is not a quantifier, but a literal string of four characters.@footnote{It ! raises an error if @option{-R} is not used.} ! The quantifier @samp{@{0@}} is permitted, causing the expression to behave as if the previous item and the quantifier were not present. *************** *** 3814,3820 **** @noindent when applied to a long string which does not match. Because ! matching proceeds from left to right, @code{sed} will look for each @samp{a} in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as --- 3823,3829 ---- @noindent when applied to a long string which does not match. Because ! matching proceeds from left to right, @command{sed} will look for each @samp{a} in the subject and then see if what follows matches the rest of the pattern. If the pattern is specified as *************** *** 3980,3986 **** The @code{(?p@{...@})} item interpolates Perl code at run time, and in this case refers recursively to the pattern in which it ! appears. Obviously, @code{sed} cannot support the interpolation of Perl code. Instead, the special item @code{(?R)} is provided for the specific case of recursion. This pattern solves the parentheses problem (assume the @code{X} modifier option is used --- 3989,3995 ---- The @code{(?p@{...@})} item interpolates Perl code at run time, and in this case refers recursively to the pattern in which it ! appears. Obviously, @command{sed} cannot support the interpolation of Perl code. Instead, the special item @code{(?R)} is provided for the specific case of recursion. This pattern solves the parentheses problem (assume the @code{X} modifier option is used *************** *** 4045,4051 **** @unnumbered Concept Index This is a general index of all issues discussed in this manual, with the ! exception of the @code{sed} commands and command-line options. @printindex cp --- 4054,4060 ---- @unnumbered Concept Index This is a general index of all issues discussed in this manual, with the ! exception of the @command{sed} commands and command-line options. @printindex cp *************** *** 4053,4059 **** @node Command and Option Index @unnumbered Command and Option Index ! This is an alphabetical list of all @code{sed} commands and command-line options. @printindex fn --- 4062,4068 ---- @node Command and Option Index @unnumbered Command and Option Index ! This is an alphabetical list of all @command{sed} commands and command-line options. @printindex fn diff -rNC3 sed-4.0.3/doc/stamp-vti sed-4.0.4/doc/stamp-vti *** sed-4.0.3/doc/stamp-vti Thu Nov 21 12:45:10 2002 --- sed-4.0.4/doc/stamp-vti Thu Dec 12 20:14:25 2002 *************** *** 1,4 **** ! @set UPDATED 19 November 2002 ! @set UPDATED-MONTH November 2002 ! @set EDITION 4.0.3 ! @set VERSION 4.0.3 --- 1,4 ---- ! @set UPDATED 12 December 2002 ! @set UPDATED-MONTH December 2002 ! @set EDITION 4.0.4 ! @set VERSION 4.0.4 diff -rNC3 sed-4.0.3/doc/version.texi sed-4.0.4/doc/version.texi *** sed-4.0.3/doc/version.texi Thu Nov 21 12:45:10 2002 --- sed-4.0.4/doc/version.texi Thu Dec 12 20:14:25 2002 *************** *** 1,4 **** ! @set UPDATED 19 November 2002 ! @set UPDATED-MONTH November 2002 ! @set EDITION 4.0.3 ! @set VERSION 4.0.3 --- 1,4 ---- ! @set UPDATED 12 December 2002 ! @set UPDATED-MONTH December 2002 ! @set EDITION 4.0.4 ! @set VERSION 4.0.4 diff -rNC3 sed-4.0.3/lib/getline.c sed-4.0.4/lib/getline.c *** sed-4.0.3/lib/getline.c Mon Dec 31 13:56:32 2001 --- sed-4.0.4/lib/getline.c Fri Nov 22 12:13:51 2002 *************** *** 1,3 **** --- 1,6 ---- + #ifdef HAVE_CONFIG_H + #include "config.h + #endif #include diff -rNC3 sed-4.0.3/lib/mkstemp.c sed-4.0.4/lib/mkstemp.c *** sed-4.0.3/lib/mkstemp.c Sun Dec 16 13:29:55 2001 --- sed-4.0.4/lib/mkstemp.c Fri Nov 22 12:13:53 2002 *************** *** 1,3 **** --- 1,6 ---- + #ifdef HAVE_CONFIG_H + #include "config.h + #endif #ifdef HAVE_STRINGS_H # include diff -rNC3 sed-4.0.3/lib/regcomp.c sed-4.0.4/lib/regcomp.c *** sed-4.0.3/lib/regcomp.c Wed Oct 23 17:02:07 2002 --- sed-4.0.4/lib/regcomp.c Fri Nov 22 12:04:08 2002 *************** *** 69,76 **** #include "regex_internal.h" static void re_compile_fastmap_iter (regex_t *bufp, ! const re_dfastate_t *init_state, ! char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); static reg_errcode_t init_word_char (re_dfa_t *dfa); #ifdef RE_ENABLE_I18N --- 69,76 ---- #include "regex_internal.h" static void re_compile_fastmap_iter (regex_t *bufp, ! const re_dfastate_t *init_state, ! char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); static reg_errcode_t init_word_char (re_dfa_t *dfa); #ifdef RE_ENABLE_I18N *************** *** 84,171 **** static void calc_next (re_dfa_t *dfa, bin_tree_t *node); static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, ! int top_clone_node, int root_node, ! unsigned int constraint); static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, ! unsigned int constraint); static reg_errcode_t calc_eclosure (re_dfa_t *dfa); static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, ! int node, int root); static void calc_inveclosure (re_dfa_t *dfa); static int fetch_number (re_string_t *input, re_token_t *token, ! reg_syntax_t syntax); static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax); static int peek_token (re_token_t *token, re_string_t *input, ! reg_syntax_t syntax); static int peek_token_bracket (re_token_t *token, re_string_t *input, ! reg_syntax_t syntax); static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, ! reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, ! re_dfa_t *dfa, re_token_t *token, ! reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, ! re_token_t *token, reg_syntax_t syntax, ! reg_errcode_t *err); static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, ! re_string_t *regexp, ! re_token_t *token, int token_len, ! re_dfa_t *dfa, ! reg_syntax_t syntax); static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, ! re_string_t *regexp, ! re_token_t *token); #ifndef _LIBC # ifdef RE_ENABLE_I18N static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, int *range_alloc, ! bracket_elem_t *start_elem, ! bracket_elem_t *end_elem); static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, ! int *coll_sym_alloc, ! const unsigned char *name); # else /* not RE_ENABLE_I18N */ static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, ! bracket_elem_t *start_elem, ! bracket_elem_t *end_elem); static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, ! const unsigned char *name); # endif /* not RE_ENABLE_I18N */ #endif /* not _LIBC */ #ifdef RE_ENABLE_I18N static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, ! int *equiv_class_alloc, ! const unsigned char *name); static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, ! int *char_class_alloc, ! const unsigned char *class_name, ! reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, ! const unsigned char *name); static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, ! const unsigned char *class_name, ! reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err); static void free_bin_tree (bin_tree_t *tree); static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right, ! re_token_type_t type, int index); static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); /* This table gives an error message for each of the error codes listed --- 84,171 ---- static void calc_next (re_dfa_t *dfa, bin_tree_t *node); static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, ! int top_clone_node, int root_node, ! unsigned int constraint); static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, ! unsigned int constraint); static reg_errcode_t calc_eclosure (re_dfa_t *dfa); static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, ! int node, int root); static void calc_inveclosure (re_dfa_t *dfa); static int fetch_number (re_string_t *input, re_token_t *token, ! reg_syntax_t syntax); static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax); static int peek_token (re_token_t *token, re_string_t *input, ! reg_syntax_t syntax); static int peek_token_bracket (re_token_t *token, re_string_t *input, ! reg_syntax_t syntax); static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, ! reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, ! re_token_t *token, reg_syntax_t syntax, ! int nest, reg_errcode_t *err); static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, ! re_dfa_t *dfa, re_token_t *token, ! reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, ! re_token_t *token, reg_syntax_t syntax, ! reg_errcode_t *err); static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, ! re_string_t *regexp, ! re_token_t *token, int token_len, ! re_dfa_t *dfa, ! reg_syntax_t syntax); static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, ! re_string_t *regexp, ! re_token_t *token); #ifndef _LIBC # ifdef RE_ENABLE_I18N static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, int *range_alloc, ! bracket_elem_t *start_elem, ! bracket_elem_t *end_elem); static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, ! int *coll_sym_alloc, ! const unsigned char *name); # else /* not RE_ENABLE_I18N */ static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, ! bracket_elem_t *start_elem, ! bracket_elem_t *end_elem); static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, ! const unsigned char *name); # endif /* not RE_ENABLE_I18N */ #endif /* not _LIBC */ #ifdef RE_ENABLE_I18N static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, ! int *equiv_class_alloc, ! const unsigned char *name); static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, ! re_charset_t *mbcset, ! int *char_class_alloc, ! const unsigned char *class_name, ! reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, ! const unsigned char *name); static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, ! const unsigned char *class_name, ! reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err); static void free_bin_tree (bin_tree_t *tree); static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right, ! re_token_type_t type, int index); static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); /* This table gives an error message for each of the error codes listed *************** *** 175,229 **** const char __re_error_msgid[] attribute_hidden = { ! #define REG_NOERROR_IDX 0 ! gettext_noop ("Success") /* REG_NOERROR */ "\0" #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") ! gettext_noop ("No match") /* REG_NOMATCH */ "\0" ! #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") gettext_noop ("Invalid regular expression") /* REG_BADPAT */ "\0" #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ "\0" ! #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") gettext_noop ("Invalid character class name") /* REG_ECTYPE */ "\0" ! #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") gettext_noop ("Trailing backslash") /* REG_EESCAPE */ "\0" ! #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") gettext_noop ("Invalid back reference") /* REG_ESUBREG */ "\0" ! #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") ! gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ "\0" ! #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ "\0" ! #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") gettext_noop ("Unmatched \\{") /* REG_EBRACE */ "\0" ! #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ "\0" ! #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") ! gettext_noop ("Invalid range end") /* REG_ERANGE */ "\0" ! #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") gettext_noop ("Memory exhausted") /* REG_ESPACE */ "\0" ! #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ "\0" ! #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") gettext_noop ("Premature end of regular expression") /* REG_EEND */ "\0" ! #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") gettext_noop ("Regular expression too big") /* REG_ESIZE */ "\0" ! #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ }; --- 175,229 ---- const char __re_error_msgid[] attribute_hidden = { ! #define REG_NOERROR_IDX 0 ! gettext_noop ("Success") /* REG_NOERROR */ "\0" #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") ! gettext_noop ("No match") /* REG_NOMATCH */ "\0" ! #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") gettext_noop ("Invalid regular expression") /* REG_BADPAT */ "\0" #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ "\0" ! #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") gettext_noop ("Invalid character class name") /* REG_ECTYPE */ "\0" ! #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") gettext_noop ("Trailing backslash") /* REG_EESCAPE */ "\0" ! #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") gettext_noop ("Invalid back reference") /* REG_ESUBREG */ "\0" ! #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") ! gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ "\0" ! #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ "\0" ! #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") gettext_noop ("Unmatched \\{") /* REG_EBRACE */ "\0" ! #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ "\0" ! #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") ! gettext_noop ("Invalid range end") /* REG_ERANGE */ "\0" ! #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") gettext_noop ("Memory exhausted") /* REG_ESPACE */ "\0" ! #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ "\0" ! #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") gettext_noop ("Premature end of regular expression") /* REG_EEND */ "\0" ! #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") gettext_noop ("Regular expression too big") /* REG_ESIZE */ "\0" ! #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ }; *************** *** 265,274 **** { reg_errcode_t ret; - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - /* And GNU code determines whether or not to get register information by passing null for the REGS argument to re_match, etc., not by setting no_sub. */ --- 265,270 ---- *************** *** 337,342 **** --- 333,346 ---- weak_alias (__re_compile_fastmap, re_compile_fastmap) #endif + static inline void + re_set_fastmap (char *fastmap, int icase, int ch) + { + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; + } + /* Helper function for re_compile_fastmap. Compile fastmap for the initial_state INIT_STATE. */ *************** *** 348,419 **** { re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; int node_cnt; for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) { int node = init_state->nodes.elems[node_cnt]; re_token_type_t type = dfa->nodes[node].type; if (type == CHARACTER) ! fastmap[dfa->nodes[node].opr.c] = 1; else if (type == SIMPLE_BRACKET) ! { ! int i, j, ch; ! for (i = 0, ch = 0; i < BITSET_UINTS; ++i) ! for (j = 0; j < UINT_BITS; ++j, ++ch) ! if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) ! fastmap[ch] = 1; ! } #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) ! { ! int i; ! re_charset_t *cset = dfa->nodes[node].opr.mbcset; ! if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes ! || cset->nranges || cset->nchar_classes) ! { # ifdef _LIBC ! if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) ! { ! /* In this case we want to catch the bytes which are ! the first byte of any collation elements. ! e.g. In da_DK, we want to catch 'a' since "aa" ! is a valid collation element, and don't catch ! 'b' since 'b' is the only collation element ! which starts from 'b'. */ ! int j, ch; ! const int32_t *table = (const int32_t *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); ! for (i = 0, ch = 0; i < BITSET_UINTS; ++i) ! for (j = 0; j < UINT_BITS; ++j, ++ch) ! if (table[ch] < 0) ! fastmap[ch] = 1; ! } # else ! if (MB_CUR_MAX > 1) ! for (i = 0; i < SBC_MAX; ++i) ! if (__btowc (i) == WEOF) ! fastmap[i] = 1; # endif /* not _LIBC */ ! } ! for (i = 0; i < cset->nmbchars; ++i) ! { ! char buf[256]; ! wctomb (buf, cset->mbchars[i]); ! fastmap[*(unsigned char *) buf] = 1; ! } ! } ! #endif /* RE_ENABLE_I18N */ ! else if (type == END_OF_RE || type == OP_PERIOD ! #ifdef RE_ENABLE_I18N ! || type == COMPLEX_BRACKET ! #endif /* RE_ENABLE_I18N */ ! ) ! { ! memset (fastmap, '\1', sizeof (char) * SBC_MAX); ! if (type == END_OF_RE) ! bufp->can_be_null = 1; ! return; ! } } } --- 352,422 ---- { re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; int node_cnt; + int icase = (MB_CUR_MAX == 1 && (bufp->syntax & RE_ICASE)); for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) { int node = init_state->nodes.elems[node_cnt]; re_token_type_t type = dfa->nodes[node].type; if (type == CHARACTER) ! re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); else if (type == SIMPLE_BRACKET) ! { ! int i, j, ch; ! for (i = 0, ch = 0; i < BITSET_UINTS; ++i) ! for (j = 0; j < UINT_BITS; ++j, ++ch) ! if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) ! re_set_fastmap (fastmap, icase, ch); ! } #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) ! { ! int i; ! re_charset_t *cset = dfa->nodes[node].opr.mbcset; ! if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes ! || cset->nranges || cset->nchar_classes) ! { # ifdef _LIBC ! if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) ! { ! /* In this case we want to catch the bytes which are ! the first byte of any collation elements. ! e.g. In da_DK, we want to catch 'a' since "aa" ! is a valid collation element, and don't catch ! 'b' since 'b' is the only collation element ! which starts from 'b'. */ ! int j, ch; ! const int32_t *table = (const int32_t *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); ! for (i = 0, ch = 0; i < BITSET_UINTS; ++i) ! for (j = 0; j < UINT_BITS; ++j, ++ch) ! if (table[ch] < 0) ! re_set_fastmap (fastmap, icase, ch); ! } # else ! if (MB_CUR_MAX > 1) ! for (i = 0; i < SBC_MAX; ++i) ! if (__btowc (i) == WEOF) ! re_set_fastmap (fastmap, icase, i); # endif /* not _LIBC */ ! } ! for (i = 0; i < cset->nmbchars; ++i) ! { ! char buf[256]; ! mbstate_t state; ! memset (&state, '\0', sizeof (state)); ! wcrtomb (buf, cset->mbchars[i], &state); ! re_set_fastmap (fastmap, icase, *(unsigned char *) buf); ! } ! } ! #endif /* RE_ENABLE_I18N */ ! else if (type == END_OF_RE || type == OP_PERIOD) ! { ! memset (fastmap, '\1', sizeof (char) * SBC_MAX); ! if (type == END_OF_RE) ! bufp->can_be_null = 1; ! return; ! } } } *************** *** 461,467 **** { reg_errcode_t ret; reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED ! : RE_SYNTAX_POSIX_BASIC); preg->buffer = NULL; preg->allocated = 0; --- 464,470 ---- { reg_errcode_t ret; reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED ! : RE_SYNTAX_POSIX_BASIC); preg->buffer = NULL; preg->allocated = 0; *************** *** 496,511 **** /* We have already checked preg->fastmap != NULL. */ if (BE (ret == REG_NOERROR, 1)) { ! /* Compute the fastmap now, since regexec cannot modify the pattern ! buffer. */ ! if (BE (re_compile_fastmap (preg) == -2, 0)) ! { ! /* Some error occurred while computing the fastmap, just forget ! about it. */ ! re_free (preg->fastmap); ! preg->fastmap = NULL; ! } } return (int) ret; --- 499,512 ---- /* We have already checked preg->fastmap != NULL. */ if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function nevers fails in this implementation. */ + (void) re_compile_fastmap (preg); + else { ! /* Some error occurred while compiling the expression. */ ! re_free (preg->fastmap); ! preg->fastmap = NULL; } return (int) ret; *************** *** 528,535 **** size_t msg_size; if (BE (errcode < 0 ! || errcode >= (int) (sizeof (__re_error_msgid_idx) ! / sizeof (__re_error_msgid_idx[0])), 0)) /* Only error codes returned by the rest of the code should be passed to this routine. If we are given anything else, or if other regex code generates an invalid error code, then the program has a bug. --- 529,536 ---- size_t msg_size; if (BE (errcode < 0 ! || errcode >= (int) (sizeof (__re_error_msgid_idx) ! / sizeof (__re_error_msgid_idx[0])), 0)) /* Only error codes returned by the rest of the code should be passed to this routine. If we are given anything else, or if other regex code generates an invalid error code, then the program has a bug. *************** *** 543,558 **** if (BE (errbuf_size != 0, 1)) { if (BE (msg_size > errbuf_size, 0)) ! { #if defined HAVE_MEMPCPY || defined _LIBC ! *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; #else ! memcpy (errbuf, msg, errbuf_size - 1); ! errbuf[errbuf_size - 1] = 0; #endif ! } else ! memcpy (errbuf, msg, msg_size); } return msg_size; --- 544,559 ---- if (BE (errbuf_size != 0, 1)) { if (BE (msg_size > errbuf_size, 0)) ! { #if defined HAVE_MEMPCPY || defined _LIBC ! *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; #else ! memcpy (errbuf, msg, errbuf_size - 1); ! errbuf[errbuf_size - 1] = 0; #endif ! } else ! memcpy (errbuf, msg, msg_size); } return msg_size; *************** *** 561,631 **** weak_alias (__regerror, regerror) #endif /* Free dynamically allocated space used by PREG. */ void regfree (preg) regex_t *preg; { - int i, j; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; if (BE (dfa != NULL, 1)) ! { ! re_free (dfa->subexps); ! ! for (i = 0; i < dfa->nodes_len; ++i) ! { ! re_token_t *node = dfa->nodes + i; ! #ifdef RE_ENABLE_I18N ! if (node->type == COMPLEX_BRACKET && node->duplicated == 0) ! free_charset (node->opr.mbcset); ! else ! #endif /* RE_ENABLE_I18N */ ! if (node->type == SIMPLE_BRACKET && node->duplicated == 0) ! re_free (node->opr.sbcset); ! } ! re_free (dfa->nexts); ! for (i = 0; i < dfa->nodes_len; ++i) ! { ! if (dfa->eclosures != NULL) ! re_node_set_free (dfa->eclosures + i); ! if (dfa->inveclosures != NULL) ! re_node_set_free (dfa->inveclosures + i); ! if (dfa->edests != NULL) ! re_node_set_free (dfa->edests + i); ! } ! re_free (dfa->edests); ! re_free (dfa->eclosures); ! re_free (dfa->inveclosures); ! re_free (dfa->nodes); ! ! for (i = 0; i <= dfa->state_hash_mask; ++i) ! { ! struct re_state_table_entry *entry = dfa->state_table + i; ! for (j = 0; j < entry->num; ++j) ! { ! re_dfastate_t *state = entry->array[j]; ! if (state->entrance_nodes != &state->nodes) ! { ! re_node_set_free (state->entrance_nodes); ! re_free (state->entrance_nodes); ! } ! re_node_set_free (&state->nodes); ! re_free (state->trtable); ! re_free (state->trtable_search); ! re_free (state); ! } ! re_free (entry->array); ! } ! re_free (dfa->state_table); - if (dfa->word_char != NULL) - re_free (dfa->word_char); - #ifdef DEBUG - re_free (dfa->re_str); - #endif - re_free (dfa); - } re_free (preg->fastmap); } #ifdef _LIBC --- 562,633 ---- weak_alias (__regerror, regerror) #endif + + static void + free_dfa_content (re_dfa_t *dfa) + { + int i, j; + + re_free (dfa->subexps); + + for (i = 0; i < dfa->nodes_len; ++i) + { + re_token_t *node = dfa->nodes + i; + #ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else + #endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); + } + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); + + if (dfa->word_char != NULL) + re_free (dfa->word_char); + #ifdef DEBUG + re_free (dfa->re_str); + #endif + + re_free (dfa); + } + + /* Free dynamically allocated space used by PREG. */ void regfree (preg) regex_t *preg; { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; if (BE (dfa != NULL, 1)) ! free_dfa_content (dfa); re_free (preg->fastmap); } #ifdef _LIBC *************** *** 651,670 **** const char *s; { reg_errcode_t ret; if (!s) { if (!re_comp_buf.buffer) ! return gettext ("No previous regular expression"); return 0; } ! if (!re_comp_buf.buffer) { re_comp_buf.fastmap = (char *) malloc (SBC_MAX); if (re_comp_buf.fastmap == NULL) ! return (char *) gettext (__re_error_msgid ! + __re_error_msgid_idx[(int) REG_ESPACE]); } /* Since `re_exec' always passes NULL for the `regs' argument, we --- 653,682 ---- const char *s; { reg_errcode_t ret; + char *fastmap; if (!s) { if (!re_comp_buf.buffer) ! return gettext ("No previous regular expression"); return 0; } ! if (re_comp_buf.buffer) ! { ! fastmap = re_comp_buf.fastmap; ! re_comp_buf.fastmap = NULL; ! __regfree (&re_comp_buf); ! memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); ! re_comp_buf.fastmap = fastmap; ! } ! ! if (re_comp_buf.fastmap == NULL) { re_comp_buf.fastmap = (char *) malloc (SBC_MAX); if (re_comp_buf.fastmap == NULL) ! return (char *) gettext (__re_error_msgid ! + __re_error_msgid_idx[(int) REG_ESPACE]); } /* Since `re_exec' always passes NULL for the `regs' argument, we *************** *** 681,686 **** --- 693,706 ---- /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); } + + #ifdef _LIBC + libc_freeres_fn (free_mem) + { + __regfree (&re_comp_buf); + } + #endif + #endif /* _REGEX_RE_COMP */ /* Internal entry point. *************** *** 704,721 **** preg->not_bol = preg->not_eol = 0; preg->used = 0; preg->re_nsub = 0; /* Initialize the dfa. */ dfa = (re_dfa_t *) preg->buffer; if (preg->allocated < sizeof (re_dfa_t)) { /* If zero allocated, but buffer is non-null, try to realloc ! enough space. This loses if buffer's address is bogus, but ! that is the user's responsibility. If ->buffer is NULL this ! is a simple allocation. */ dfa = re_realloc (preg->buffer, re_dfa_t, 1); if (dfa == NULL) ! return REG_ESPACE; preg->allocated = sizeof (re_dfa_t); } preg->buffer = (unsigned char *) dfa; --- 724,743 ---- preg->not_bol = preg->not_eol = 0; preg->used = 0; preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; /* Initialize the dfa. */ dfa = (re_dfa_t *) preg->buffer; if (preg->allocated < sizeof (re_dfa_t)) { /* If zero allocated, but buffer is non-null, try to realloc ! enough space. This loses if buffer's address is bogus, but ! that is the user's responsibility. If ->buffer is NULL this ! is a simple allocation. */ dfa = re_realloc (preg->buffer, re_dfa_t, 1); if (dfa == NULL) ! return REG_ESPACE; preg->allocated = sizeof (re_dfa_t); } preg->buffer = (unsigned char *) dfa; *************** *** 734,740 **** #endif err = re_string_construct (®exp, pattern, length, preg->translate, ! syntax & RE_ICASE); if (BE (err != REG_NOERROR, 0)) { re_free (dfa); --- 756,762 ---- #endif err = re_string_construct (®exp, pattern, length, preg->translate, ! syntax & RE_ICASE); if (BE (err != REG_NOERROR, 0)) { re_free (dfa); *************** *** 756,769 **** /* Then create the initial state of the dfa. */ err = create_initial_state (dfa); - if (BE (err != REG_NOERROR, 0)) - goto re_compile_internal_free_return; - re_compile_internal_free_return: /* Release work areas. */ free_workarea_compile (preg); re_string_destruct (®exp); return err; } --- 778,795 ---- /* Then create the initial state of the dfa. */ err = create_initial_state (dfa); /* Release work areas. */ free_workarea_compile (preg); re_string_destruct (®exp); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_dfa_content (dfa); + preg->buffer = NULL; + } + return err; } *************** *** 797,806 **** dfa->word_char = NULL; if (BE (dfa->nodes == NULL || dfa->state_table == NULL ! || dfa->subexps == NULL, 0)) { /* We don't bother to free anything which was allocated. Very ! soon the process will go down anyway. */ dfa->subexps = NULL; dfa->state_table = NULL; dfa->nodes = NULL; --- 823,832 ---- dfa->word_char = NULL; if (BE (dfa->nodes == NULL || dfa->state_table == NULL ! || dfa->subexps == NULL, 0)) { /* We don't bother to free anything which was allocated. Very ! soon the process will go down anyway. */ dfa->subexps = NULL; dfa->state_table = NULL; dfa->nodes = NULL; *************** *** 824,830 **** for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') ! dfa->word_char[i] |= 1 << j; return REG_NOERROR; } --- 850,856 ---- for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') ! dfa->word_char[i] |= 1 << j; return REG_NOERROR; } *************** *** 864,895 **** if (dfa->nbackref > 0) for (i = 0; i < init_nodes.nelem; ++i) { ! int node_idx = init_nodes.elems[i]; ! re_token_type_t type = dfa->nodes[node_idx].type; ! int clexp_idx; ! if (type != OP_BACK_REF) ! continue; ! for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) ! { ! re_token_t *clexp_node; ! clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; ! if (clexp_node->type == OP_CLOSE_SUBEXP ! && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx) ! break; ! } ! if (clexp_idx == init_nodes.nelem) ! continue; ! ! if (type == OP_BACK_REF) ! { ! int dest_idx = dfa->edests[node_idx].elems[0]; ! if (!re_node_set_contains (&init_nodes, dest_idx)) ! { ! re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); ! i = 0; ! } ! } } /* It must be the first time to invoke acquire_state. */ --- 890,921 ---- if (dfa->nbackref > 0) for (i = 0; i < init_nodes.nelem; ++i) { ! int node_idx = init_nodes.elems[i]; ! re_token_type_t type = dfa->nodes[node_idx].type; ! int clexp_idx; ! if (type != OP_BACK_REF) ! continue; ! for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) ! { ! re_token_t *clexp_node; ! clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; ! if (clexp_node->type == OP_CLOSE_SUBEXP ! && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx) ! break; ! } ! if (clexp_idx == init_nodes.nelem) ! continue; ! ! if (type == OP_BACK_REF) ! { ! int dest_idx = dfa->edests[node_idx].elems[0]; ! if (!re_node_set_contains (&init_nodes, dest_idx)) ! { ! re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); ! i = 0; ! } ! } } /* It must be the first time to invoke acquire_state. */ *************** *** 900,915 **** if (dfa->init_state->has_constraint) { dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, ! CONTEXT_WORD); dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, ! CONTEXT_NEWLINE); dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, ! &init_nodes, ! CONTEXT_NEWLINE ! | CONTEXT_BEGBUF); if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL ! || dfa->init_state_begbuf == NULL, 0)) ! return err; } else dfa->init_state_word = dfa->init_state_nl --- 926,941 ---- if (dfa->init_state->has_constraint) { dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, ! CONTEXT_WORD); dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, ! CONTEXT_NEWLINE); dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, ! &init_nodes, ! CONTEXT_NEWLINE ! | CONTEXT_BEGBUF); if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL ! || dfa->init_state_begbuf == NULL, 0)) ! return err; } else dfa->init_state_word = dfa->init_state_nl *************** *** 935,941 **** dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc); if (BE (dfa->nexts == NULL || dfa->edests == NULL ! || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0)) return REG_ESPACE; /* Initialize them. */ for (i = 0; i < dfa->nodes_len; ++i) --- 961,967 ---- dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc); if (BE (dfa->nexts == NULL || dfa->edests == NULL ! || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0)) return REG_ESPACE; /* Initialize them. */ for (i = 0; i < dfa->nodes_len; ++i) *************** *** 951,957 **** { ret = calc_eclosure (dfa); if (ret == REG_NOERROR) ! calc_inveclosure (dfa); } return ret; } --- 977,983 ---- { ret = calc_eclosure (dfa); if (ret == REG_NOERROR) ! calc_inveclosure (dfa); } return ret; } *************** *** 977,990 **** { ret = analyze_tree (dfa, node->left); if (BE (ret != REG_NOERROR, 0)) ! return ret; } /* Calculate "first" etc. for the right child. */ if (node->right != NULL) { ret = analyze_tree (dfa, node->right); if (BE (ret != REG_NOERROR, 0)) ! return ret; } return REG_NOERROR; } --- 1003,1016 ---- { ret = analyze_tree (dfa, node->left); if (BE (ret != REG_NOERROR, 0)) ! return ret; } /* Calculate "first" etc. for the right child. */ if (node->right != NULL) { ret = analyze_tree (dfa, node->right); if (BE (ret != REG_NOERROR, 0)) ! return ret; } return REG_NOERROR; } *************** *** 1036,1042 **** assert (node->left != NULL); #endif if (node->left->first == -1) ! calc_first (dfa, node->left); node->first = node->left->first; break; case OP_ALT: --- 1062,1068 ---- assert (node->left != NULL); #endif if (node->left->first == -1) ! calc_first (dfa, node->left); node->first = node->left->first; break; case OP_ALT: *************** *** 1048,1054 **** assert (node->left != NULL); #endif if (node->left->first == -1) ! calc_first (dfa, node->left); node->first = node->left->first; break; } --- 1074,1080 ---- assert (node->left != NULL); #endif if (node->left->first == -1) ! calc_first (dfa, node->left); node->first = node->left->first; break; } *************** *** 1068,1074 **** node->next = -1; idx = node->node_idx; if (node->type == 0) ! dfa->nexts[idx] = node->next; return; } --- 1094,1100 ---- node->next = -1; idx = node->node_idx; if (node->type == 0) ! dfa->nexts[idx] = node->next; return; } *************** *** 1083,1098 **** break; case CONCAT: if (parent->left == node) ! { ! if (parent->right->first == -1) ! calc_first (dfa, parent->right); ! node->next = parent->right->first; ! break; ! } /* else fall through */ default: if (parent->next == -1) ! calc_next (dfa, parent); node->next = parent->next; break; } --- 1109,1124 ---- break; case CONCAT: if (parent->left == node) ! { ! if (parent->right->first == -1) ! calc_first (dfa, parent->right); ! node->next = parent->right->first; ! break; ! } /* else fall through */ default: if (parent->next == -1) ! calc_next (dfa, parent); node->next = parent->next; break; } *************** *** 1113,1162 **** if (node->type == 0) { if (dfa->nodes[idx].type == OP_DUP_ASTERISK ! || dfa->nodes[idx].type == OP_DUP_PLUS ! || dfa->nodes[idx].type == OP_DUP_QUESTION) ! { ! if (node->left->first == -1) ! calc_first (dfa, node->left); ! if (node->next == -1) ! calc_next (dfa, node); ! re_node_set_init_2 (dfa->edests + idx, node->left->first, ! node->next); ! } else if (dfa->nodes[idx].type == OP_ALT) ! { ! int left, right; ! if (node->left != NULL) ! { ! if (node->left->first == -1) ! calc_first (dfa, node->left); ! left = node->left->first; ! } ! else ! { ! if (node->next == -1) ! calc_next (dfa, node); ! left = node->next; ! } ! if (node->right != NULL) ! { ! if (node->right->first == -1) ! calc_first (dfa, node->right); ! right = node->right->first; ! } ! else ! { ! if (node->next == -1) ! calc_next (dfa, node); ! right = node->next; ! } ! re_node_set_init_2 (dfa->edests + idx, left, right); ! } else if (dfa->nodes[idx].type == ANCHOR ! || dfa->nodes[idx].type == OP_OPEN_SUBEXP ! || dfa->nodes[idx].type == OP_CLOSE_SUBEXP ! || dfa->nodes[idx].type == OP_BACK_REF) ! re_node_set_init_1 (dfa->edests + idx, node->next); } } --- 1139,1188 ---- if (node->type == 0) { if (dfa->nodes[idx].type == OP_DUP_ASTERISK ! || dfa->nodes[idx].type == OP_DUP_PLUS ! || dfa->nodes[idx].type == OP_DUP_QUESTION) ! { ! if (node->left->first == -1) ! calc_first (dfa, node->left); ! if (node->next == -1) ! calc_next (dfa, node); ! re_node_set_init_2 (dfa->edests + idx, node->left->first, ! node->next); ! } else if (dfa->nodes[idx].type == OP_ALT) ! { ! int left, right; ! if (node->left != NULL) ! { ! if (node->left->first == -1) ! calc_first (dfa, node->left); ! left = node->left->first; ! } ! else ! { ! if (node->next == -1) ! calc_next (dfa, node); ! left = node->next; ! } ! if (node->right != NULL) ! { ! if (node->right->first == -1) ! calc_first (dfa, node->right); ! right = node->right->first; ! } ! else ! { ! if (node->next == -1) ! calc_next (dfa, node); ! right = node->next; ! } ! re_node_set_init_2 (dfa->edests + idx, left, right); ! } else if (dfa->nodes[idx].type == ANCHOR ! || dfa->nodes[idx].type == OP_OPEN_SUBEXP ! || dfa->nodes[idx].type == OP_CLOSE_SUBEXP ! || dfa->nodes[idx].type == OP_BACK_REF) ! re_node_set_init_1 (dfa->edests + idx, node->next); } } *************** *** 1166,1172 **** static reg_errcode_t duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, ! init_constraint) re_dfa_t *dfa; int top_org_node, top_clone_node, root_node; unsigned int init_constraint; --- 1192,1198 ---- static reg_errcode_t duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, ! init_constraint) re_dfa_t *dfa; int top_org_node, top_clone_node, root_node; unsigned int init_constraint; *************** *** 1178,1261 **** { int org_dest, clone_dest; if (dfa->nodes[org_node].type == OP_BACK_REF) ! { ! /* If the back reference epsilon-transit, its destination must ! also have the constraint. Then duplicate the epsilon closure ! of the destination of the back reference, and store it in ! edests of the back reference. */ ! org_dest = dfa->nexts[org_node]; ! re_node_set_empty (dfa->edests + clone_node); ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! dfa->nexts[clone_node] = dfa->nexts[org_node]; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! } else if (dfa->edests[org_node].nelem == 0) ! { ! /* In case of the node can't epsilon-transit, don't duplicate the ! destination and store the original destination as the ! destination of the node. */ ! dfa->nexts[clone_node] = dfa->nexts[org_node]; ! break; ! } else if (dfa->edests[org_node].nelem == 1) ! { ! /* In case of the node can epsilon-transit, and it has only one ! destination. */ ! org_dest = dfa->edests[org_node].elems[0]; ! re_node_set_empty (dfa->edests + clone_node); ! if (dfa->nodes[org_node].type == ANCHOR) ! { ! /* In case of the node has another constraint, append it. */ ! if (org_node == root_node && clone_node != org_node) ! { ! /* ...but if the node is root_node itself, it means the ! epsilon closure have a loop, then tie it to the ! destination of the root_node. */ ! ret = re_node_set_insert (dfa->edests + clone_node, ! org_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! break; ! } ! constraint |= dfa->nodes[org_node].opr.ctx_type; ! } ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! } else /* dfa->edests[org_node].nelem == 2 */ ! { ! /* In case of the node can epsilon-transit, and it has two ! destinations. */ ! org_dest = dfa->edests[org_node].elems[0]; ! re_node_set_empty (dfa->edests + clone_node); ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! ! err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node, ! constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ! org_dest = dfa->edests[org_node].elems[1]; ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! } org_node = org_dest; clone_node = clone_dest; } --- 1204,1287 ---- { int org_dest, clone_dest; if (dfa->nodes[org_node].type == OP_BACK_REF) ! { ! /* If the back reference epsilon-transit, its destination must ! also have the constraint. Then duplicate the epsilon closure ! of the destination of the back reference, and store it in ! edests of the back reference. */ ! org_dest = dfa->nexts[org_node]; ! re_node_set_empty (dfa->edests + clone_node); ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! dfa->nexts[clone_node] = dfa->nexts[org_node]; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! } else if (dfa->edests[org_node].nelem == 0) ! { ! /* In case of the node can't epsilon-transit, don't duplicate the ! destination and store the original destination as the ! destination of the node. */ ! dfa->nexts[clone_node] = dfa->nexts[org_node]; ! break; ! } else if (dfa->edests[org_node].nelem == 1) ! { ! /* In case of the node can epsilon-transit, and it has only one ! destination. */ ! org_dest = dfa->edests[org_node].elems[0]; ! re_node_set_empty (dfa->edests + clone_node); ! if (dfa->nodes[org_node].type == ANCHOR) ! { ! /* In case of the node has another constraint, append it. */ ! if (org_node == root_node && clone_node != org_node) ! { ! /* ...but if the node is root_node itself, it means the ! epsilon closure have a loop, then tie it to the ! destination of the root_node. */ ! ret = re_node_set_insert (dfa->edests + clone_node, ! org_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! break; ! } ! constraint |= dfa->nodes[org_node].opr.ctx_type; ! } ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! } else /* dfa->edests[org_node].nelem == 2 */ ! { ! /* In case of the node can epsilon-transit, and it has two ! destinations. */ ! org_dest = dfa->edests[org_node].elems[0]; ! re_node_set_empty (dfa->edests + clone_node); ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! ! err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node, ! constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ! org_dest = dfa->edests[org_node].elems[1]; ! err = duplicate_node (&clone_dest, dfa, org_dest, constraint); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); ! if (BE (ret < 0, 0)) ! return REG_ESPACE; ! } org_node = org_dest; clone_node = clone_dest; } *************** *** 1299,1308 **** for (src = 0; src < dfa->nodes_len; ++src) { for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) ! { ! dest = dfa->eclosures[src].elems[idx]; ! re_node_set_insert (dfa->inveclosures + dest, src); ! } } } --- 1325,1334 ---- for (src = 0; src < dfa->nodes_len; ++src) { for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) ! { ! dest = dfa->eclosures[src].elems[idx]; ! re_node_set_insert (dfa->inveclosures + dest, src); ! } } } *************** *** 1323,1351 **** reg_errcode_t err; re_node_set eclosure_elem; if (node_idx == dfa->nodes_len) ! { ! if (!incomplete) ! break; ! incomplete = 0; ! node_idx = 0; ! } #ifdef DEBUG assert (dfa->eclosures[node_idx].nelem != -1); #endif /* If we have already calculated, skip it. */ if (dfa->eclosures[node_idx].nelem != 0) ! continue; /* Calculate epsilon closure of `node_idx'. */ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); if (BE (err != REG_NOERROR, 0)) ! return err; if (dfa->eclosures[node_idx].nelem == 0) ! { ! incomplete = 1; ! re_node_set_free (&eclosure_elem); ! } } return REG_NOERROR; } --- 1349,1377 ---- reg_errcode_t err; re_node_set eclosure_elem; if (node_idx == dfa->nodes_len) ! { ! if (!incomplete) ! break; ! incomplete = 0; ! node_idx = 0; ! } #ifdef DEBUG assert (dfa->eclosures[node_idx].nelem != -1); #endif /* If we have already calculated, skip it. */ if (dfa->eclosures[node_idx].nelem != 0) ! continue; /* Calculate epsilon closure of `node_idx'. */ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); if (BE (err != REG_NOERROR, 0)) ! return err; if (dfa->eclosures[node_idx].nelem == 0) ! { ! incomplete = 1; ! re_node_set_free (&eclosure_elem); ! } } return REG_NOERROR; } *************** *** 1372,1378 **** dfa->eclosures[node].nelem = -1; constraint = ((dfa->nodes[node].type == ANCHOR) ! ? dfa->nodes[node].opr.ctx_type : 0); /* If the current node has constraints, duplicate all nodes. Since they must inherit the constraints. */ if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) --- 1398,1404 ---- dfa->eclosures[node].nelem = -1; constraint = ((dfa->nodes[node].type == ANCHOR) ! ? dfa->nodes[node].opr.ctx_type : 0); /* If the current node has constraints, duplicate all nodes. Since they must inherit the constraints. */ if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) *************** *** 1381,1421 **** org_node = cur_node = node; err = duplicate_node_closure (dfa, node, node, node, constraint); if (BE (err != REG_NOERROR, 0)) ! return err; } /* Expand each epsilon destination nodes. */ if (IS_EPSILON_NODE(dfa->nodes[node].type)) for (i = 0; i < dfa->edests[node].nelem; ++i) { ! re_node_set eclosure_elem; ! int edest = dfa->edests[node].elems[i]; ! /* If calculating the epsilon closure of `edest' is in progress, ! return intermediate result. */ ! if (dfa->eclosures[edest].nelem == -1) ! { ! incomplete = 1; ! continue; ! } ! /* If we haven't calculated the epsilon closure of `edest' yet, ! calculate now. Otherwise use calculated epsilon closure. */ ! if (dfa->eclosures[edest].nelem == 0) ! { ! err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! else ! eclosure_elem = dfa->eclosures[edest]; ! /* Merge the epsilon closure of `edest'. */ ! re_node_set_merge (&eclosure, &eclosure_elem); ! /* If the epsilon closure of `edest' is incomplete, ! the epsilon closure of this node is also incomplete. */ ! if (dfa->eclosures[edest].nelem == 0) ! { ! incomplete = 1; ! re_node_set_free (&eclosure_elem); ! } } /* Epsilon closures include itself. */ --- 1407,1447 ---- org_node = cur_node = node; err = duplicate_node_closure (dfa, node, node, node, constraint); if (BE (err != REG_NOERROR, 0)) ! return err; } /* Expand each epsilon destination nodes. */ if (IS_EPSILON_NODE(dfa->nodes[node].type)) for (i = 0; i < dfa->edests[node].nelem; ++i) { ! re_node_set eclosure_elem; ! int edest = dfa->edests[node].elems[i]; ! /* If calculating the epsilon closure of `edest' is in progress, ! return intermediate result. */ ! if (dfa->eclosures[edest].nelem == -1) ! { ! incomplete = 1; ! continue; ! } ! /* If we haven't calculated the epsilon closure of `edest' yet, ! calculate now. Otherwise use calculated epsilon closure. */ ! if (dfa->eclosures[edest].nelem == 0) ! { ! err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! else ! eclosure_elem = dfa->eclosures[edest]; ! /* Merge the epsilon closure of `edest'. */ ! re_node_set_merge (&eclosure, &eclosure_elem); ! /* If the epsilon closure of `edest' is incomplete, ! the epsilon closure of this node is also incomplete. */ ! if (dfa->eclosures[edest].nelem == 0) ! { ! incomplete = 1; ! re_node_set_free (&eclosure_elem); ! } } /* Epsilon closures include itself. */ *************** *** 1479,1583 **** { unsigned char c2; if (re_string_cur_idx (input) + 1 >= re_string_length (input)) ! { ! token->type = BACK_SLASH; ! return 1; ! } c2 = re_string_peek_byte_case (input, 1); token->opr.c = c2; token->type = CHARACTER; switch (c2) ! { ! case '|': ! if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) ! token->type = OP_ALT; ! break; ! case '1': case '2': case '3': case '4': case '5': ! case '6': case '7': case '8': case '9': ! if (!(syntax & RE_NO_BK_REFS)) ! { ! token->type = OP_BACK_REF; ! token->opr.idx = c2 - '0'; ! } ! break; ! case '<': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = WORD_FIRST; ! } ! break; ! case '>': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = WORD_LAST; ! } ! break; ! case 'b': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = WORD_DELIM; ! } ! break; ! case 'B': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = INSIDE_WORD; ! } ! break; ! case 'w': ! if (!(syntax & RE_NO_GNU_OPS)) ! token->type = OP_WORD; ! break; ! case 'W': ! if (!(syntax & RE_NO_GNU_OPS)) ! token->type = OP_NOTWORD; ! break; ! case '`': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = BUF_FIRST; ! } ! break; ! case '\'': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = BUF_LAST; ! } ! break; ! case '(': ! if (!(syntax & RE_NO_BK_PARENS)) ! token->type = OP_OPEN_SUBEXP; ! break; ! case ')': ! if (!(syntax & RE_NO_BK_PARENS)) ! token->type = OP_CLOSE_SUBEXP; ! break; ! case '+': ! if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_PLUS; ! break; ! case '?': ! if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_QUESTION; ! break; ! case '{': ! if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) ! token->type = OP_OPEN_DUP_NUM; ! break; ! case '}': ! if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) ! token->type = OP_CLOSE_DUP_NUM; ! break; ! default: ! break; ! } return 2; } --- 1505,1609 ---- { unsigned char c2; if (re_string_cur_idx (input) + 1 >= re_string_length (input)) ! { ! token->type = BACK_SLASH; ! return 1; ! } c2 = re_string_peek_byte_case (input, 1); token->opr.c = c2; token->type = CHARACTER; switch (c2) ! { ! case '|': ! if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) ! token->type = OP_ALT; ! break; ! case '1': case '2': case '3': case '4': case '5': ! case '6': case '7': case '8': case '9': ! if (!(syntax & RE_NO_BK_REFS)) ! { ! token->type = OP_BACK_REF; ! token->opr.idx = c2 - '0'; ! } ! break; ! case '<': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = WORD_FIRST; ! } ! break; ! case '>': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = WORD_LAST; ! } ! break; ! case 'b': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = WORD_DELIM; ! } ! break; ! case 'B': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = INSIDE_WORD; ! } ! break; ! case 'w': ! if (!(syntax & RE_NO_GNU_OPS)) ! token->type = OP_WORD; ! break; ! case 'W': ! if (!(syntax & RE_NO_GNU_OPS)) ! token->type = OP_NOTWORD; ! break; ! case '`': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = BUF_FIRST; ! } ! break; ! case '\'': ! if (!(syntax & RE_NO_GNU_OPS)) ! { ! token->type = ANCHOR; ! token->opr.idx = BUF_LAST; ! } ! break; ! case '(': ! if (!(syntax & RE_NO_BK_PARENS)) ! token->type = OP_OPEN_SUBEXP; ! break; ! case ')': ! if (!(syntax & RE_NO_BK_PARENS)) ! token->type = OP_CLOSE_SUBEXP; ! break; ! case '+': ! if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_PLUS; ! break; ! case '?': ! if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_QUESTION; ! break; ! case '{': ! if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) ! token->type = OP_OPEN_DUP_NUM; ! break; ! case '}': ! if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) ! token->type = OP_CLOSE_DUP_NUM; ! break; ! default: ! break; ! } return 2; } *************** *** 1586,1623 **** { case '\n': if (syntax & RE_NEWLINE_ALT) ! token->type = OP_ALT; break; case '|': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) ! token->type = OP_ALT; break; case '*': token->type = OP_DUP_ASTERISK; break; case '+': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ! token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ! token->type = OP_CLOSE_DUP_NUM; break; case '(': if (syntax & RE_NO_BK_PARENS) ! token->type = OP_OPEN_SUBEXP; break; case ')': if (syntax & RE_NO_BK_PARENS) ! token->type = OP_CLOSE_SUBEXP; break; case '[': token->type = OP_OPEN_BRACKET; --- 1612,1649 ---- { case '\n': if (syntax & RE_NEWLINE_ALT) ! token->type = OP_ALT; break; case '|': if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) ! token->type = OP_ALT; break; case '*': token->type = OP_DUP_ASTERISK; break; case '+': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_PLUS; break; case '?': if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) ! token->type = OP_DUP_QUESTION; break; case '{': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ! token->type = OP_OPEN_DUP_NUM; break; case '}': if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) ! token->type = OP_CLOSE_DUP_NUM; break; case '(': if (syntax & RE_NO_BK_PARENS) ! token->type = OP_OPEN_SUBEXP; break; case ')': if (syntax & RE_NO_BK_PARENS) ! token->type = OP_CLOSE_SUBEXP; break; case '[': token->type = OP_OPEN_BRACKET; *************** *** 1627,1653 **** break; case '^': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && ! re_string_cur_idx (input) != 0) ! { ! char prev = re_string_peek_byte (input, -1); ! if (prev != '|' && prev != '(' && ! (!(syntax & RE_NEWLINE_ALT) || prev != '\n')) ! break; ! } token->type = ANCHOR; token->opr.idx = LINE_FIRST; break; case '$': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && ! re_string_cur_idx (input) + 1 != re_string_length (input)) ! { ! re_token_t next; ! re_string_skip_bytes (input, 1); ! peek_token (&next, input, syntax); ! re_string_skip_bytes (input, -1); ! if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) ! break; ! } token->type = ANCHOR; token->opr.idx = LINE_LAST; break; --- 1653,1679 ---- break; case '^': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && ! re_string_cur_idx (input) != 0) ! { ! char prev = re_string_peek_byte (input, -1); ! if (prev != '|' && prev != '(' && ! (!(syntax & RE_NEWLINE_ALT) || prev != '\n')) ! break; ! } token->type = ANCHOR; token->opr.idx = LINE_FIRST; break; case '$': if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && ! re_string_cur_idx (input) + 1 != re_string_length (input)) ! { ! re_token_t next; ! re_string_skip_bytes (input, 1); ! peek_token (&next, input, syntax); ! re_string_skip_bytes (input, -1); ! if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) ! break; ! } token->type = ANCHOR; token->opr.idx = LINE_LAST; break; *************** *** 1702,1727 **** token->opr.c = c2; token_len = 2; switch (c2) ! { ! case '.': ! token->type = OP_OPEN_COLL_ELEM; ! break; ! case '=': ! token->type = OP_OPEN_EQUIV_CLASS; ! break; ! case ':': ! if (syntax & RE_CHAR_CLASSES) ! { ! token->type = OP_OPEN_CHAR_CLASS; ! break; ! } ! /* else fall through. */ ! default: ! token->type = CHARACTER; ! token->opr.c = c; ! token_len = 1; ! break; ! } return token_len; } switch (c) --- 1728,1753 ---- token->opr.c = c2; token_len = 2; switch (c2) ! { ! case '.': ! token->type = OP_OPEN_COLL_ELEM; ! break; ! case '=': ! token->type = OP_OPEN_EQUIV_CLASS; ! break; ! case ':': ! if (syntax & RE_CHAR_CLASSES) ! { ! token->type = OP_OPEN_CHAR_CLASS; ! break; ! } ! /* else fall through. */ ! default: ! token->type = CHARACTER; ! token->opr.c = c; ! token_len = 1; ! break; ! } return token_len; } switch (c) *************** *** 1747,1755 **** Parse the regular expression REGEXP and return the structure tree. If an error is occured, ERR is set by error code, and return NULL. This function build the following tree, from regular expression : ! CAT ! / \ ! / \ EOR CAT means concatenation. --- 1773,1781 ---- Parse the regular expression REGEXP and return the structure tree. If an error is occured, ERR is set by error code, and return NULL. This function build the following tree, from regular expression : ! CAT ! / \ ! / \ EOR CAT means concatenation. *************** *** 1786,1794 **** /* This function build the following tree, from regular expression |: ! ALT ! / \ ! / \ ALT means alternative, which represents the operator `|'. */ --- 1812,1820 ---- /* This function build the following tree, from regular expression |: ! ALT ! / \ ! / \ ALT means alternative, which represents the operator `|'. */ *************** *** 1815,1837 **** new_idx = re_dfa_add_node (dfa, alt_token, 0); *token = fetch_token (regexp, syntax); if (token->type != OP_ALT && token->type != END_OF_RE ! && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) ! { ! branch = parse_branch (regexp, preg, token, syntax, nest, err); ! if (BE (*err != REG_NOERROR && branch == NULL, 0)) ! { ! free_bin_tree (tree); ! return NULL; ! } ! } else ! branch = NULL; tree = create_tree (tree, branch, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } dfa->has_plural_match = 1; } return tree; --- 1841,1863 ---- new_idx = re_dfa_add_node (dfa, alt_token, 0); *token = fetch_token (regexp, syntax); if (token->type != OP_ALT && token->type != END_OF_RE ! && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) ! { ! branch = parse_branch (regexp, preg, token, syntax, nest, err); ! if (BE (*err != REG_NOERROR && branch == NULL, 0)) ! { ! free_bin_tree (tree); ! return NULL; ! } ! } else ! branch = NULL; tree = create_tree (tree, branch, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } dfa->has_plural_match = 1; } return tree; *************** *** 1839,1846 **** /* This function build the following tree, from regular expression : ! CAT ! / \ / \ --- 1865,1872 ---- /* This function build the following tree, from regular expression : ! CAT ! / \ / \ *************** *** 1861,1894 **** return NULL; while (token->type != OP_ALT && token->type != END_OF_RE ! && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { exp = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && exp == NULL, 0)) ! { ! free_bin_tree (tree); ! return NULL; ! } if (tree != NULL && exp != NULL) ! { ! tree = create_tree (tree, exp, CONCAT, 0); ! if (tree == NULL) ! { ! *err = REG_ESPACE; ! return NULL; ! } ! } else if (tree == NULL) ! tree = exp; /* Otherwise exp == NULL, we don't need to create new tree. */ } return tree; } /* This function build the following tree, from regular expression a*: ! * ! | ! a */ static bin_tree_t * --- 1887,1920 ---- return NULL; while (token->type != OP_ALT && token->type != END_OF_RE ! && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { exp = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && exp == NULL, 0)) ! { ! free_bin_tree (tree); ! return NULL; ! } if (tree != NULL && exp != NULL) ! { ! tree = create_tree (tree, exp, CONCAT, 0); ! if (tree == NULL) ! { ! *err = REG_ESPACE; ! return NULL; ! } ! } else if (tree == NULL) ! tree = exp; /* Otherwise exp == NULL, we don't need to create new tree. */ } return tree; } /* This function build the following tree, from regular expression a*: ! * ! | ! a */ static bin_tree_t * *************** *** 1909,1959 **** new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! { ! while (!re_string_eoi (regexp) ! && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) ! { ! bin_tree_t *mbc_remain; ! *token = fetch_token (regexp, syntax); ! new_idx = re_dfa_add_node (dfa, *token, 0); ! mbc_remain = create_tree (NULL, NULL, 0, new_idx); ! tree = create_tree (tree, mbc_remain, CONCAT, 0); ! if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0)) ! return *err = REG_ESPACE, NULL; ! } ! } #endif break; case OP_OPEN_SUBEXP: tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_OPEN_BRACKET: tree = parse_bracket_exp (regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_BACK_REF: if (BE (preg->re_nsub < token->opr.idx ! || dfa->subexps[token->opr.idx - 1].end == -1, 0)) ! { ! *err = REG_ESUBREG; ! return NULL; ! } new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } ++dfa->nbackref; dfa->has_mb_node = 1; break; --- 1935,1985 ---- new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! { ! while (!re_string_eoi (regexp) ! && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) ! { ! bin_tree_t *mbc_remain; ! *token = fetch_token (regexp, syntax); ! new_idx = re_dfa_add_node (dfa, *token, 0); ! mbc_remain = create_tree (NULL, NULL, 0, new_idx); ! tree = create_tree (tree, mbc_remain, CONCAT, 0); ! if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0)) ! return *err = REG_ESPACE, NULL; ! } ! } #endif break; case OP_OPEN_SUBEXP: tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_OPEN_BRACKET: tree = parse_bracket_exp (regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_BACK_REF: if (BE (preg->re_nsub < token->opr.idx ! || dfa->subexps[token->opr.idx - 1].end == -1, 0)) ! { ! *err = REG_ESUBREG; ! return NULL; ! } new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } ++dfa->nbackref; dfa->has_mb_node = 1; break; *************** *** 1962,1984 **** case OP_DUP_QUESTION: case OP_OPEN_DUP_NUM: if (syntax & RE_CONTEXT_INVALID_OPS) ! { ! *err = REG_BADRPT; ! return NULL; ! } else if (syntax & RE_CONTEXT_INDEP_OPS) ! { ! *token = fetch_token (regexp, syntax); ! return parse_expression (regexp, preg, token, syntax, nest, err); ! } /* else fall through */ case OP_CLOSE_SUBEXP: if ((token->type == OP_CLOSE_SUBEXP) && ! !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) ! { ! *err = REG_ERPAREN; ! return NULL; ! } /* else fall through */ case OP_CLOSE_DUP_NUM: /* We treat it as a normal character. */ --- 1988,2010 ---- case OP_DUP_QUESTION: case OP_OPEN_DUP_NUM: if (syntax & RE_CONTEXT_INVALID_OPS) ! { ! *err = REG_BADRPT; ! return NULL; ! } else if (syntax & RE_CONTEXT_INDEP_OPS) ! { ! *token = fetch_token (regexp, syntax); ! return parse_expression (regexp, preg, token, syntax, nest, err); ! } /* else fall through */ case OP_CLOSE_SUBEXP: if ((token->type == OP_CLOSE_SUBEXP) && ! !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) ! { ! *err = REG_ERPAREN; ! return NULL; ! } /* else fall through */ case OP_CLOSE_DUP_NUM: /* We treat it as a normal character. */ *************** *** 1988,2059 **** new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } break; case ANCHOR: if (dfa->word_char == NULL) ! { ! *err = init_word_char (dfa); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } if (token->opr.ctx_type == WORD_DELIM) ! { ! bin_tree_t *tree_first, *tree_last; ! int idx_first, idx_last; ! token->opr.ctx_type = WORD_FIRST; ! idx_first = re_dfa_add_node (dfa, *token, 0); ! tree_first = create_tree (NULL, NULL, 0, idx_first); ! token->opr.ctx_type = WORD_LAST; ! idx_last = re_dfa_add_node (dfa, *token, 0); ! tree_last = create_tree (NULL, NULL, 0, idx_last); ! token->type = OP_ALT; ! new_idx = re_dfa_add_node (dfa, *token, 0); ! tree = create_tree (tree_first, tree_last, 0, new_idx); ! if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1 ! || tree_first == NULL || tree_last == NULL ! || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } ! } else ! { ! new_idx = re_dfa_add_node (dfa, *token, 0); ! tree = create_tree (NULL, NULL, 0, new_idx); ! if (BE (new_idx == -1 || tree == NULL, 0)) ! return *err = REG_ESPACE, NULL; ! } /* We must return here, since ANCHORs can't be followed ! by repetition operators. ! eg. RE"^*" is invalid or "", ! it must not be "". */ *token = fetch_token (regexp, syntax); return tree; case OP_PERIOD: new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } if (MB_CUR_MAX > 1) ! dfa->has_mb_node = 1; break; case OP_WORD: tree = build_word_op (dfa, 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_NOTWORD: tree = build_word_op (dfa, 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_ALT: case END_OF_RE: --- 2014,2085 ---- new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } break; case ANCHOR: if (dfa->word_char == NULL) ! { ! *err = init_word_char (dfa); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } if (token->opr.ctx_type == WORD_DELIM) ! { ! bin_tree_t *tree_first, *tree_last; ! int idx_first, idx_last; ! token->opr.ctx_type = WORD_FIRST; ! idx_first = re_dfa_add_node (dfa, *token, 0); ! tree_first = create_tree (NULL, NULL, 0, idx_first); ! token->opr.ctx_type = WORD_LAST; ! idx_last = re_dfa_add_node (dfa, *token, 0); ! tree_last = create_tree (NULL, NULL, 0, idx_last); ! token->type = OP_ALT; ! new_idx = re_dfa_add_node (dfa, *token, 0); ! tree = create_tree (tree_first, tree_last, 0, new_idx); ! if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1 ! || tree_first == NULL || tree_last == NULL ! || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } ! } else ! { ! new_idx = re_dfa_add_node (dfa, *token, 0); ! tree = create_tree (NULL, NULL, 0, new_idx); ! if (BE (new_idx == -1 || tree == NULL, 0)) ! return *err = REG_ESPACE, NULL; ! } /* We must return here, since ANCHORs can't be followed ! by repetition operators. ! eg. RE"^*" is invalid or "", ! it must not be "". */ *token = fetch_token (regexp, syntax); return tree; case OP_PERIOD: new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } if (MB_CUR_MAX > 1) ! dfa->has_mb_node = 1; break; case OP_WORD: tree = build_word_op (dfa, 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_NOTWORD: tree = build_word_op (dfa, 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; break; case OP_ALT: case END_OF_RE: *************** *** 2071,2081 **** *token = fetch_token (regexp, syntax); while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS ! || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) { tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; dfa->has_plural_match = 1; } --- 2097,2107 ---- *token = fetch_token (regexp, syntax); while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS ! || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) { tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; dfa->has_plural_match = 1; } *************** *** 2084,2092 **** /* This function build the following tree, from regular expression (): ! SUBEXP ! | ! */ static bin_tree_t * --- 2110,2118 ---- /* This function build the following tree, from regular expression (): ! SUBEXP ! | ! */ static bin_tree_t * *************** *** 2109,2119 **** dfa->subexps_alloc *= 2; new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc); if (BE (new_array == NULL, 0)) ! { ! dfa->subexps_alloc /= 2; ! *err = REG_ESPACE; ! return NULL; ! } dfa->subexps = new_array; } dfa->subexps[cur_nsub].start = dfa->nodes_len; --- 2135,2145 ---- dfa->subexps_alloc *= 2; new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc); if (BE (new_array == NULL, 0)) ! { ! dfa->subexps_alloc /= 2; ! *err = REG_ESPACE; ! return NULL; ! } dfa->subexps = new_array; } dfa->subexps[cur_nsub].start = dfa->nodes_len; *************** *** 2136,2142 **** { tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; } if (BE (token->type != OP_CLOSE_SUBEXP, 0)) { --- 2162,2168 ---- { tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) ! return NULL; } if (BE (token->type != OP_CLOSE_SUBEXP, 0)) { *************** *** 2148,2154 **** dfa->subexps[cur_nsub].end = dfa->nodes_len; right_par = create_tree (NULL, NULL, 0, new_idx); tree = ((tree == NULL) ? right_par ! : create_tree (tree, right_par, CONCAT, 0)); tree = create_tree (left_par, tree, CONCAT, 0); if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0)) { --- 2174,2180 ---- dfa->subexps[cur_nsub].end = dfa->nodes_len; right_par = create_tree (NULL, NULL, 0, new_idx); tree = ((tree == NULL) ? right_par ! : create_tree (tree, right_par, CONCAT, 0)); tree = create_tree (left_par, tree, CONCAT, 0); if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0)) { *************** *** 2182,2293 **** int start = fetch_number (regexp, token, syntax); bin_tree_t *elem; if (start == -1) ! { ! if (token->type == CHARACTER && token->opr.c == ',') ! start = 0; /* We treat "{,m}" as "{0,m}". */ ! else ! { ! *err = REG_BADBR; /* {} is invalid. */ ! return NULL; ! } ! } if (BE (start != -2, 1)) ! { ! /* We treat "{n}" as "{n,n}". */ ! end = ((token->type == OP_CLOSE_DUP_NUM) ? start ! : ((token->type == CHARACTER && token->opr.c == ',') ! ? fetch_number (regexp, token, syntax) : -2)); ! } if (BE (start == -2 || end == -2, 0)) ! { ! /* Invalid sequence. */ ! if (token->type == OP_CLOSE_DUP_NUM) ! goto parse_dup_op_invalid_interval; ! else ! goto parse_dup_op_ebrace; ! } if (BE (start == 0 && end == 0, 0)) ! { ! /* We treat "{0}" and "{0,0}" as null string. */ ! *token = fetch_token (regexp, syntax); ! free_bin_tree (dup_elem); ! return NULL; ! } /* Extract "{n,m}" to "...{0,}". */ elem = tree; for (i = 0; i < start; ++i) ! if (i != 0) ! { ! work_tree = duplicate_tree (elem, dfa); ! tree = create_tree (tree, work_tree, CONCAT, 0); ! if (BE (work_tree == NULL || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } if (end == -1) ! { ! /* We treat "{0,}" as "*". */ ! dup_token.type = OP_DUP_ASTERISK; ! if (start > 0) ! { ! elem = duplicate_tree (elem, dfa); ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! work_tree = create_tree (elem, NULL, 0, new_idx); ! tree = create_tree (tree, work_tree, CONCAT, 0); ! if (BE (elem == NULL || new_idx == -1 || work_tree == NULL ! || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! else ! { ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! tree = create_tree (elem, NULL, 0, new_idx); ! if (BE (new_idx == -1 || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! } else if (end - start > 0) ! { ! /* Then extract "{0,m}" to "??...?". */ ! dup_token.type = OP_DUP_QUESTION; ! if (start > 0) ! { ! elem = duplicate_tree (elem, dfa); ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! elem = create_tree (elem, NULL, 0, new_idx); ! tree = create_tree (tree, elem, CONCAT, 0); ! if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! else ! { ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! tree = elem = create_tree (elem, NULL, 0, new_idx); ! if (BE (new_idx == -1 || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! for (i = 1; i < end - start; ++i) ! { ! work_tree = duplicate_tree (elem, dfa); ! tree = create_tree (tree, work_tree, CONCAT, 0); ! if (BE (work_tree == NULL || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } ! } ! } } else { new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (tree, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } } *token = fetch_token (regexp, syntax); return tree; --- 2208,2319 ---- int start = fetch_number (regexp, token, syntax); bin_tree_t *elem; if (start == -1) ! { ! if (token->type == CHARACTER && token->opr.c == ',') ! start = 0; /* We treat "{,m}" as "{0,m}". */ ! else ! { ! *err = REG_BADBR; /* {} is invalid. */ ! return NULL; ! } ! } if (BE (start != -2, 1)) ! { ! /* We treat "{n}" as "{n,n}". */ ! end = ((token->type == OP_CLOSE_DUP_NUM) ? start ! : ((token->type == CHARACTER && token->opr.c == ',') ! ? fetch_number (regexp, token, syntax) : -2)); ! } if (BE (start == -2 || end == -2, 0)) ! { ! /* Invalid sequence. */ ! if (token->type == OP_CLOSE_DUP_NUM) ! goto parse_dup_op_invalid_interval; ! else ! goto parse_dup_op_ebrace; ! } if (BE (start == 0 && end == 0, 0)) ! { ! /* We treat "{0}" and "{0,0}" as null string. */ ! *token = fetch_token (regexp, syntax); ! free_bin_tree (dup_elem); ! return NULL; ! } /* Extract "{n,m}" to "...{0,}". */ elem = tree; for (i = 0; i < start; ++i) ! if (i != 0) ! { ! work_tree = duplicate_tree (elem, dfa); ! tree = create_tree (tree, work_tree, CONCAT, 0); ! if (BE (work_tree == NULL || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } if (end == -1) ! { ! /* We treat "{0,}" as "*". */ ! dup_token.type = OP_DUP_ASTERISK; ! if (start > 0) ! { ! elem = duplicate_tree (elem, dfa); ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! work_tree = create_tree (elem, NULL, 0, new_idx); ! tree = create_tree (tree, work_tree, CONCAT, 0); ! if (BE (elem == NULL || new_idx == -1 || work_tree == NULL ! || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! else ! { ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! tree = create_tree (elem, NULL, 0, new_idx); ! if (BE (new_idx == -1 || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! } else if (end - start > 0) ! { ! /* Then extract "{0,m}" to "??...?". */ ! dup_token.type = OP_DUP_QUESTION; ! if (start > 0) ! { ! elem = duplicate_tree (elem, dfa); ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! elem = create_tree (elem, NULL, 0, new_idx); ! tree = create_tree (tree, elem, CONCAT, 0); ! if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! else ! { ! new_idx = re_dfa_add_node (dfa, dup_token, 0); ! tree = elem = create_tree (elem, NULL, 0, new_idx); ! if (BE (new_idx == -1 || tree == NULL, 0)) ! goto parse_dup_op_espace; ! } ! for (i = 1; i < end - start; ++i) ! { ! work_tree = duplicate_tree (elem, dfa); ! tree = create_tree (tree, work_tree, CONCAT, 0); ! if (BE (work_tree == NULL || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } ! } ! } } else { new_idx = re_dfa_add_node (dfa, *token, 0); tree = create_tree (tree, NULL, 0, new_idx); if (BE (new_idx == -1 || tree == NULL, 0)) ! { ! *err = REG_ESPACE; ! return NULL; ! } } *token = fetch_token (regexp, syntax); return tree; *************** *** 2343,2358 **** unsigned int start_ch, end_ch; /* Equivalence Classes and Character Classes can't be a range start/end. */ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS ! || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, ! 0)) return REG_ERANGE; /* We can handle no multi character collating elements without libc support. */ if (BE ((start_elem->type == COLL_SYM ! && strlen ((char *) start_elem->opr.name) > 1) ! || (end_elem->type == COLL_SYM ! && strlen ((char *) end_elem->opr.name) > 1), 0)) return REG_ECOLLATE; # ifdef RE_ENABLE_I18N --- 2369,2384 ---- unsigned int start_ch, end_ch; /* Equivalence Classes and Character Classes can't be a range start/end. */ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS ! || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, ! 0)) return REG_ERANGE; /* We can handle no multi character collating elements without libc support. */ if (BE ((start_elem->type == COLL_SYM ! && strlen ((char *) start_elem->opr.name) > 1) ! || (end_elem->type == COLL_SYM ! && strlen ((char *) end_elem->opr.name) > 1), 0)) return REG_ECOLLATE; # ifdef RE_ENABLE_I18N *************** *** 2361,2375 **** wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch ! : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] ! : 0)); end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch ! : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] ! : 0)); start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) ! ? __btowc (start_ch) : start_elem->opr.wch); end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) ! ? __btowc (end_ch) : end_elem->opr.wch); cmp_buf[0] = start_wc; cmp_buf[4] = end_wc; if (wcscoll (cmp_buf, cmp_buf + 4) > 0) --- 2387,2401 ---- wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch ! : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] ! : 0)); end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch ! : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] ! : 0)); start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) ! ? __btowc (start_ch) : start_elem->opr.wch); end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) ! ? __btowc (end_ch) : end_elem->opr.wch); cmp_buf[0] = start_wc; cmp_buf[4] = end_wc; if (wcscoll (cmp_buf, cmp_buf + 4) > 0) *************** *** 2378,2402 **** /* Check the space of the arrays. */ if (*range_alloc == mbcset->nranges) { ! /* There are not enough space, need realloc. */ ! wchar_t *new_array_start, *new_array_end; ! int new_nranges; ! ! /* +1 in case of mbcset->nranges is 0. */ ! new_nranges = 2 * mbcset->nranges + 1; ! /* Use realloc since mbcset->range_starts and mbcset->range_ends ! are NULL if *range_alloc == 0. */ ! new_array_start = re_realloc (mbcset->range_starts, wchar_t, ! new_nranges); ! new_array_end = re_realloc (mbcset->range_ends, wchar_t, ! new_nranges); ! ! if (BE (new_array_start == NULL || new_array_end == NULL, 0)) ! return REG_ESPACE; ! ! mbcset->range_starts = new_array_start; ! mbcset->range_ends = new_array_end; ! *range_alloc = new_nranges; } mbcset->range_starts[mbcset->nranges] = start_wc; --- 2404,2428 ---- /* Check the space of the arrays. */ if (*range_alloc == mbcset->nranges) { ! /* There are not enough space, need realloc. */ ! wchar_t *new_array_start, *new_array_end; ! int new_nranges; ! ! /* +1 in case of mbcset->nranges is 0. */ ! new_nranges = 2 * mbcset->nranges + 1; ! /* Use realloc since mbcset->range_starts and mbcset->range_ends ! are NULL if *range_alloc == 0. */ ! new_array_start = re_realloc (mbcset->range_starts, wchar_t, ! new_nranges); ! new_array_end = re_realloc (mbcset->range_ends, wchar_t, ! new_nranges); ! ! if (BE (new_array_start == NULL || new_array_end == NULL, 0)) ! return REG_ESPACE; ! ! mbcset->range_starts = new_array_start; ! mbcset->range_ends = new_array_end; ! *range_alloc = new_nranges; } mbcset->range_starts[mbcset->nranges] = start_wc; *************** *** 2405,2431 **** /* Build the table for single byte characters. */ for (wc = 0; wc <= SBC_MAX; ++wc) { ! cmp_buf[2] = wc; ! if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 ! && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) ! bitset_set (sbcset, wc); } } # else /* not RE_ENABLE_I18N */ { unsigned int ch; start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch ! : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] ! : 0)); end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch ! : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] ! : 0)); if (start_ch > end_ch) return REG_ERANGE; /* Build the table for single byte characters. */ for (ch = 0; ch <= SBC_MAX; ++ch) if (start_ch <= ch && ch <= end_ch) ! bitset_set (sbcset, ch); } # endif /* not RE_ENABLE_I18N */ return REG_NOERROR; --- 2431,2457 ---- /* Build the table for single byte characters. */ for (wc = 0; wc <= SBC_MAX; ++wc) { ! cmp_buf[2] = wc; ! if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 ! && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) ! bitset_set (sbcset, wc); } } # else /* not RE_ENABLE_I18N */ { unsigned int ch; start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch ! : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] ! : 0)); end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch ! : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] ! : 0)); if (start_ch > end_ch) return REG_ERANGE; /* Build the table for single byte characters. */ for (ch = 0; ch <= SBC_MAX; ++ch) if (start_ch <= ch && ch <= end_ch) ! bitset_set (sbcset, ch); } # endif /* not RE_ENABLE_I18N */ return REG_NOERROR; *************** *** 2486,2514 **** static inline int32_t seek_collating_symbol_entry (name, name_len) ! const unsigned char *name; ! size_t name_len; { int32_t hash = elem_hash ((const char *) name, name_len); int32_t elem = hash % table_size; int32_t second = hash % (table_size - 2); while (symb_table[2 * elem] != 0) ! { ! /* First compare the hashing value. */ ! if (symb_table[2 * elem] == hash ! /* Compare the length of the name. */ ! && name_len == extra[symb_table[2 * elem + 1]] ! /* Compare the name. */ ! && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], ! name_len) == 0) ! { ! /* Yep, this is the entry. */ ! break; ! } ! ! /* Next entry. */ ! elem += second; ! } return elem; } --- 2512,2540 ---- static inline int32_t seek_collating_symbol_entry (name, name_len) ! const unsigned char *name; ! size_t name_len; { int32_t hash = elem_hash ((const char *) name, name_len); int32_t elem = hash % table_size; int32_t second = hash % (table_size - 2); while (symb_table[2 * elem] != 0) ! { ! /* First compare the hashing value. */ ! if (symb_table[2 * elem] == hash ! /* Compare the length of the name. */ ! && name_len == extra[symb_table[2 * elem + 1]] ! /* Compare the name. */ ! && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], ! name_len) == 0) ! { ! /* Yep, this is the entry. */ ! break; ! } ! ! /* Next entry. */ ! elem += second; ! } return elem; } *************** *** 2518,2578 **** static inline unsigned int lookup_collation_sequence_value (br_elem) ! bracket_elem_t *br_elem; { if (br_elem->type == SB_CHAR) ! { ! /* ! if (MB_CUR_MAX == 1) ! */ ! if (nrules == 0) ! return collseqmb[br_elem->opr.ch]; ! else ! { ! wint_t wc = __btowc (br_elem->opr.ch); ! return collseq_table_lookup (collseqwc, wc); ! } ! } else if (br_elem->type == MB_CHAR) ! { ! return collseq_table_lookup (collseqwc, br_elem->opr.wch); ! } else if (br_elem->type == COLL_SYM) ! { ! size_t sym_name_len = strlen ((char *) br_elem->opr.name); ! if (nrules != 0) ! { ! int32_t elem, idx; ! elem = seek_collating_symbol_entry (br_elem->opr.name, ! sym_name_len); ! if (symb_table[2 * elem] != 0) ! { ! /* We found the entry. */ ! idx = symb_table[2 * elem + 1]; ! /* Skip the name of collating element name. */ ! idx += 1 + extra[idx]; ! /* Skip the byte sequence of the collating element. */ ! idx += 1 + extra[idx]; ! /* Adjust for the alignment. */ ! idx = (idx + 3) & ~3; ! /* Skip the multibyte collation sequence value. */ ! idx += sizeof (unsigned int); ! /* Skip the wide char sequence of the collating element. */ ! idx += sizeof (unsigned int) * ! (1 + *(unsigned int *) (extra + idx)); ! /* Return the collation sequence value. */ ! return *(unsigned int *) (extra + idx); ! } ! else if (symb_table[2 * elem] == 0 && sym_name_len == 1) ! { ! /* No valid character. Match it as a single byte ! character. */ ! return collseqmb[br_elem->opr.name[0]]; ! } ! } ! else if (sym_name_len == 1) ! return collseqmb[br_elem->opr.name[0]]; ! } return UINT_MAX; } --- 2544,2604 ---- static inline unsigned int lookup_collation_sequence_value (br_elem) ! bracket_elem_t *br_elem; { if (br_elem->type == SB_CHAR) ! { ! /* ! if (MB_CUR_MAX == 1) ! */ ! if (nrules == 0) ! return collseqmb[br_elem->opr.ch]; ! else ! { ! wint_t wc = __btowc (br_elem->opr.ch); ! return collseq_table_lookup (collseqwc, wc); ! } ! } else if (br_elem->type == MB_CHAR) ! { ! return collseq_table_lookup (collseqwc, br_elem->opr.wch); ! } else if (br_elem->type == COLL_SYM) ! { ! size_t sym_name_len = strlen ((char *) br_elem->opr.name); ! if (nrules != 0) ! { ! int32_t elem, idx; ! elem = seek_collating_symbol_entry (br_elem->opr.name, ! sym_name_len); ! if (symb_table[2 * elem] != 0) ! { ! /* We found the entry. */ ! idx = symb_table[2 * elem + 1]; ! /* Skip the name of collating element name. */ ! idx += 1 + extra[idx]; ! /* Skip the byte sequence of the collating element. */ ! idx += 1 + extra[idx]; ! /* Adjust for the alignment. */ ! idx = (idx + 3) & ~3; ! /* Skip the multibyte collation sequence value. */ ! idx += sizeof (unsigned int); ! /* Skip the wide char sequence of the collating element. */ ! idx += sizeof (unsigned int) * ! (1 + *(unsigned int *) (extra + idx)); ! /* Return the collation sequence value. */ ! return *(unsigned int *) (extra + idx); ! } ! else if (symb_table[2 * elem] == 0 && sym_name_len == 1) ! { ! /* No valid character. Match it as a single byte ! character. */ ! return collseqmb[br_elem->opr.name[0]]; ! } ! } ! else if (sym_name_len == 1) ! return collseqmb[br_elem->opr.name[0]]; ! } return UINT_MAX; } *************** *** 2586,2598 **** static inline reg_errcode_t # ifdef RE_ENABLE_I18N build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) ! re_charset_t *mbcset; ! int *range_alloc; # else /* not RE_ENABLE_I18N */ build_range_exp (sbcset, start_elem, end_elem) # endif /* not RE_ENABLE_I18N */ ! re_bitset_ptr_t sbcset; ! bracket_elem_t *start_elem, *end_elem; { unsigned int ch; uint32_t start_collseq; --- 2612,2624 ---- static inline reg_errcode_t # ifdef RE_ENABLE_I18N build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) ! re_charset_t *mbcset; ! int *range_alloc; # else /* not RE_ENABLE_I18N */ build_range_exp (sbcset, start_elem, end_elem) # endif /* not RE_ENABLE_I18N */ ! re_bitset_ptr_t sbcset; ! bracket_elem_t *start_elem, *end_elem; { unsigned int ch; uint32_t start_collseq; *************** *** 2601,2644 **** # ifdef RE_ENABLE_I18N /* Check the space of the arrays. */ if (*range_alloc == mbcset->nranges) ! { ! /* There are not enough space, need realloc. */ ! uint32_t *new_array_start; ! uint32_t *new_array_end; ! int new_nranges; ! ! /* +1 in case of mbcset->nranges is 0. */ ! new_nranges = 2 * mbcset->nranges + 1; ! /* Use realloc since mbcset->range_starts and mbcset->range_ends ! are NULL if *range_alloc == 0. */ ! new_array_start = re_realloc (mbcset->range_starts, uint32_t, ! new_nranges); ! new_array_end = re_realloc (mbcset->range_ends, uint32_t, ! new_nranges); ! ! if (BE (new_array_start == NULL || new_array_end == NULL, 0)) ! return REG_ESPACE; ! ! mbcset->range_starts = new_array_start; ! mbcset->range_ends = new_array_end; ! *range_alloc = new_nranges; ! } # endif /* RE_ENABLE_I18N */ /* Equivalence Classes and Character Classes can't be a range ! start/end. */ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS ! || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, ! 0)) ! return REG_ERANGE; start_collseq = lookup_collation_sequence_value (start_elem); end_collseq = lookup_collation_sequence_value (end_elem); /* Check start/end collation sequence values. */ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) ! return REG_ECOLLATE; if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) ! return REG_ERANGE; # ifdef RE_ENABLE_I18N /* Got valid collation sequence values, add them as a new entry. */ --- 2627,2670 ---- # ifdef RE_ENABLE_I18N /* Check the space of the arrays. */ if (*range_alloc == mbcset->nranges) ! { ! /* There are not enough space, need realloc. */ ! uint32_t *new_array_start; ! uint32_t *new_array_end; ! int new_nranges; ! ! /* +1 in case of mbcset->nranges is 0. */ ! new_nranges = 2 * mbcset->nranges + 1; ! /* Use realloc since mbcset->range_starts and mbcset->range_ends ! are NULL if *range_alloc == 0. */ ! new_array_start = re_realloc (mbcset->range_starts, uint32_t, ! new_nranges); ! new_array_end = re_realloc (mbcset->range_ends, uint32_t, ! new_nranges); ! ! if (BE (new_array_start == NULL || new_array_end == NULL, 0)) ! return REG_ESPACE; ! ! mbcset->range_starts = new_array_start; ! mbcset->range_ends = new_array_end; ! *range_alloc = new_nranges; ! } # endif /* RE_ENABLE_I18N */ /* Equivalence Classes and Character Classes can't be a range ! start/end. */ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS ! || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, ! 0)) ! return REG_ERANGE; start_collseq = lookup_collation_sequence_value (start_elem); end_collseq = lookup_collation_sequence_value (end_elem); /* Check start/end collation sequence values. */ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) ! return REG_ECOLLATE; if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) ! return REG_ERANGE; # ifdef RE_ENABLE_I18N /* Got valid collation sequence values, add them as a new entry. */ *************** *** 2648,2665 **** /* Build the table for single byte characters. */ for (ch = 0; ch <= SBC_MAX; ch++) ! { ! uint32_t ch_collseq; ! /* ! if (MB_CUR_MAX == 1) ! */ ! if (nrules == 0) ! ch_collseq = collseqmb[ch]; ! else ! ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch)); ! if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) ! bitset_set (sbcset, ch); ! } return REG_NOERROR; } --- 2674,2691 ---- /* Build the table for single byte characters. */ for (ch = 0; ch <= SBC_MAX; ch++) ! { ! uint32_t ch_collseq; ! /* ! if (MB_CUR_MAX == 1) ! */ ! if (nrules == 0) ! ch_collseq = collseqmb[ch]; ! else ! ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch)); ! if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) ! bitset_set (sbcset, ch); ! } return REG_NOERROR; } *************** *** 2672,2736 **** static inline reg_errcode_t # ifdef RE_ENABLE_I18N build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) ! re_charset_t *mbcset; ! int *coll_sym_alloc; # else /* not RE_ENABLE_I18N */ build_collating_symbol (sbcset, name) # endif /* not RE_ENABLE_I18N */ ! re_bitset_ptr_t sbcset; ! const unsigned char *name; { int32_t elem, idx; size_t name_len = strlen ((const char *) name); if (nrules != 0) ! { ! elem = seek_collating_symbol_entry (name, name_len); ! if (symb_table[2 * elem] != 0) ! { ! /* We found the entry. */ ! idx = symb_table[2 * elem + 1]; ! /* Skip the name of collating element name. */ ! idx += 1 + extra[idx]; ! } ! else if (symb_table[2 * elem] == 0 && name_len == 1) ! { ! /* No valid character, treat it as a normal ! character. */ ! bitset_set (sbcset, name[0]); ! return REG_NOERROR; ! } ! else ! return REG_ECOLLATE; # ifdef RE_ENABLE_I18N ! /* Got valid collation sequence, add it as a new entry. */ ! /* Check the space of the arrays. */ ! if (*coll_sym_alloc == mbcset->ncoll_syms) ! { ! /* Not enough, realloc it. */ ! /* +1 in case of mbcset->ncoll_syms is 0. */ ! *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; ! /* Use realloc since mbcset->coll_syms is NULL ! if *alloc == 0. */ ! mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t, ! *coll_sym_alloc); ! if (BE (mbcset->coll_syms == NULL, 0)) ! return REG_ESPACE; ! } ! mbcset->coll_syms[mbcset->ncoll_syms++] = idx; # endif /* RE_ENABLE_I18N */ ! return REG_NOERROR; ! } else ! { ! if (BE (name_len != 1, 0)) ! return REG_ECOLLATE; ! else ! { ! bitset_set (sbcset, name[0]); ! return REG_NOERROR; ! } ! } } #endif --- 2698,2762 ---- static inline reg_errcode_t # ifdef RE_ENABLE_I18N build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) ! re_charset_t *mbcset; ! int *coll_sym_alloc; # else /* not RE_ENABLE_I18N */ build_collating_symbol (sbcset, name) # endif /* not RE_ENABLE_I18N */ ! re_bitset_ptr_t sbcset; ! const unsigned char *name; { int32_t elem, idx; size_t name_len = strlen ((const char *) name); if (nrules != 0) ! { ! elem = seek_collating_symbol_entry (name, name_len); ! if (symb_table[2 * elem] != 0) ! { ! /* We found the entry. */ ! idx = symb_table[2 * elem + 1]; ! /* Skip the name of collating element name. */ ! idx += 1 + extra[idx]; ! } ! else if (symb_table[2 * elem] == 0 && name_len == 1) ! { ! /* No valid character, treat it as a normal ! character. */ ! bitset_set (sbcset, name[0]); ! return REG_NOERROR; ! } ! else ! return REG_ECOLLATE; # ifdef RE_ENABLE_I18N ! /* Got valid collation sequence, add it as a new entry. */ ! /* Check the space of the arrays. */ ! if (*coll_sym_alloc == mbcset->ncoll_syms) ! { ! /* Not enough, realloc it. */ ! /* +1 in case of mbcset->ncoll_syms is 0. */ ! *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; ! /* Use realloc since mbcset->coll_syms is NULL ! if *alloc == 0. */ ! mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t, ! *coll_sym_alloc); ! if (BE (mbcset->coll_syms == NULL, 0)) ! return REG_ESPACE; ! } ! mbcset->coll_syms[mbcset->ncoll_syms++] = idx; # endif /* RE_ENABLE_I18N */ ! return REG_NOERROR; ! } else ! { ! if (BE (name_len != 1, 0)) ! return REG_ECOLLATE; ! else ! { ! bitset_set (sbcset, name[0]); ! return REG_NOERROR; ! } ! } } #endif *************** *** 2754,2765 **** /* if (MB_CUR_MAX > 1) */ ! collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_SYMB_TABLEMB); extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_SYMB_EXTRAMB); } #endif sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); --- 2780,2791 ---- /* if (MB_CUR_MAX > 1) */ ! collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_SYMB_TABLEMB); extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_SYMB_EXTRAMB); } #endif sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); *************** *** 2791,2809 **** non_match = 1; #endif /* not RE_ENABLE_I18N */ if (syntax & RE_HAT_LISTS_NOT_NEWLINE) ! bitset_set (sbcset, '\0'); re_string_skip_bytes (regexp, token_len); /* Skip a token. */ token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! for (i = 0; i < SBC_MAX; ++i) ! if (__btowc (i) == WEOF) ! bitset_set (sbcset, i); #endif /* RE_ENABLE_I18N */ } --- 2817,2835 ---- non_match = 1; #endif /* not RE_ENABLE_I18N */ if (syntax & RE_HAT_LISTS_NOT_NEWLINE) ! bitset_set (sbcset, '\0'); re_string_skip_bytes (regexp, token_len); /* Skip a token. */ token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! for (i = 0; i < SBC_MAX; ++i) ! if (__btowc (i) == WEOF) ! bitset_set (sbcset, i); #endif /* RE_ENABLE_I18N */ } *************** *** 2822,2942 **** start_elem.opr.name = start_name_buf; ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, ! syntax); if (BE (ret != REG_NOERROR, 0)) ! { ! *err = ret; ! goto parse_bracket_exp_free_return; ! } token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } if (token->type == OP_CHARSET_RANGE) ! { ! re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ ! token_len2 = peek_token_bracket (&token2, regexp, syntax); ! if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } ! if (token2.type == OP_CLOSE_BRACKET) ! { ! /* We treat the last '-' as a normal character. */ ! re_string_skip_bytes (regexp, -token_len); ! token->type = CHARACTER; ! } ! else ! is_range_exp = 1; ! } if (is_range_exp == 1) ! { ! end_elem.opr.name = end_name_buf; ! ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, ! dfa, syntax); ! if (BE (ret != REG_NOERROR, 0)) ! { ! *err = ret; ! goto parse_bracket_exp_free_return; ! } ! ! token_len = peek_token_bracket (token, regexp, syntax); ! if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } ! *err = build_range_exp (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &range_alloc, ! #endif /* RE_ENABLE_I18N */ ! &start_elem, &end_elem); ! if (BE (*err != REG_NOERROR, 0)) ! goto parse_bracket_exp_free_return; ! } else ! { ! switch (start_elem.type) ! { ! case SB_CHAR: ! bitset_set (sbcset, start_elem.opr.ch); ! break; ! #ifdef RE_ENABLE_I18N ! case MB_CHAR: ! /* Check whether the array has enough space. */ ! if (mbchar_alloc == mbcset->nmbchars) ! { ! /* Not enough, realloc it. */ ! /* +1 in case of mbcset->nmbchars is 0. */ ! mbchar_alloc = 2 * mbcset->nmbchars + 1; ! /* Use realloc since array is NULL if *alloc == 0. */ ! mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t, ! mbchar_alloc); ! if (BE (mbcset->mbchars == NULL, 0)) ! goto parse_bracket_exp_espace; ! } ! mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; ! break; ! #endif /* RE_ENABLE_I18N */ ! case EQUIV_CLASS: ! *err = build_equiv_class (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &equiv_class_alloc, ! #endif /* RE_ENABLE_I18N */ ! start_elem.opr.name); ! if (BE (*err != REG_NOERROR, 0)) ! goto parse_bracket_exp_free_return; ! break; ! case COLL_SYM: ! *err = build_collating_symbol (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &coll_sym_alloc, ! #endif /* RE_ENABLE_I18N */ ! start_elem.opr.name); ! if (BE (*err != REG_NOERROR, 0)) ! goto parse_bracket_exp_free_return; ! break; ! case CHAR_CLASS: ! ret = build_charclass (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &char_class_alloc, ! #endif /* RE_ENABLE_I18N */ ! start_elem.opr.name, syntax); ! if (BE (ret != REG_NOERROR, 0)) ! goto parse_bracket_exp_espace; ! break; ! default: ! assert (0); ! break; ! } ! } if (token->type == OP_CLOSE_BRACKET) ! break; } re_string_skip_bytes (regexp, token_len); /* Skip a token. */ --- 2848,2968 ---- start_elem.opr.name = start_name_buf; ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, ! syntax); if (BE (ret != REG_NOERROR, 0)) ! { ! *err = ret; ! goto parse_bracket_exp_free_return; ! } token_len = peek_token_bracket (token, regexp, syntax); if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } if (token->type == OP_CHARSET_RANGE) ! { ! re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ ! token_len2 = peek_token_bracket (&token2, regexp, syntax); ! if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } ! if (token2.type == OP_CLOSE_BRACKET) ! { ! /* We treat the last '-' as a normal character. */ ! re_string_skip_bytes (regexp, -token_len); ! token->type = CHARACTER; ! } ! else ! is_range_exp = 1; ! } if (is_range_exp == 1) ! { ! end_elem.opr.name = end_name_buf; ! ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, ! dfa, syntax); ! if (BE (ret != REG_NOERROR, 0)) ! { ! *err = ret; ! goto parse_bracket_exp_free_return; ! } ! ! token_len = peek_token_bracket (token, regexp, syntax); ! if (BE (token->type == END_OF_RE, 0)) ! { ! *err = REG_BADPAT; ! goto parse_bracket_exp_free_return; ! } ! *err = build_range_exp (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &range_alloc, ! #endif /* RE_ENABLE_I18N */ ! &start_elem, &end_elem); ! if (BE (*err != REG_NOERROR, 0)) ! goto parse_bracket_exp_free_return; ! } else ! { ! switch (start_elem.type) ! { ! case SB_CHAR: ! bitset_set (sbcset, start_elem.opr.ch); ! break; ! #ifdef RE_ENABLE_I18N ! case MB_CHAR: ! /* Check whether the array has enough space. */ ! if (mbchar_alloc == mbcset->nmbchars) ! { ! /* Not enough, realloc it. */ ! /* +1 in case of mbcset->nmbchars is 0. */ ! mbchar_alloc = 2 * mbcset->nmbchars + 1; ! /* Use realloc since array is NULL if *alloc == 0. */ ! mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t, ! mbchar_alloc); ! if (BE (mbcset->mbchars == NULL, 0)) ! goto parse_bracket_exp_espace; ! } ! mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; ! break; ! #endif /* RE_ENABLE_I18N */ ! case EQUIV_CLASS: ! *err = build_equiv_class (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &equiv_class_alloc, ! #endif /* RE_ENABLE_I18N */ ! start_elem.opr.name); ! if (BE (*err != REG_NOERROR, 0)) ! goto parse_bracket_exp_free_return; ! break; ! case COLL_SYM: ! *err = build_collating_symbol (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &coll_sym_alloc, ! #endif /* RE_ENABLE_I18N */ ! start_elem.opr.name); ! if (BE (*err != REG_NOERROR, 0)) ! goto parse_bracket_exp_free_return; ! break; ! case CHAR_CLASS: ! ret = build_charclass (sbcset, ! #ifdef RE_ENABLE_I18N ! mbcset, &char_class_alloc, ! #endif /* RE_ENABLE_I18N */ ! start_elem.opr.name, syntax); ! if (BE (ret != REG_NOERROR, 0)) ! goto parse_bracket_exp_espace; ! break; ! default: ! assert (0); ! break; ! } ! } if (token->type == OP_CLOSE_BRACKET) ! break; } re_string_skip_bytes (regexp, token_len); /* Skip a token. */ *************** *** 2960,2966 **** #ifdef RE_ENABLE_I18N if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes ! || mbcset->non_match))) { re_token_t alt_token; bin_tree_t *mbc_tree; --- 2986,2992 ---- #ifdef RE_ENABLE_I18N if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes ! || mbcset->non_match))) { re_token_t alt_token; bin_tree_t *mbc_tree; *************** *** 2971,2984 **** new_idx = re_dfa_add_node (dfa, br_token, 0); mbc_tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || mbc_tree == NULL, 0)) ! goto parse_bracket_exp_espace; /* Then join them by ALT node. */ dfa->has_plural_match = 1; alt_token.type = OP_ALT; new_idx = re_dfa_add_node (dfa, alt_token, 0); work_tree = create_tree (work_tree, mbc_tree, 0, new_idx); if (BE (new_idx != -1 && mbc_tree != NULL, 1)) ! return work_tree; } else { --- 2997,3010 ---- new_idx = re_dfa_add_node (dfa, br_token, 0); mbc_tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || mbc_tree == NULL, 0)) ! goto parse_bracket_exp_espace; /* Then join them by ALT node. */ dfa->has_plural_match = 1; alt_token.type = OP_ALT; new_idx = re_dfa_add_node (dfa, alt_token, 0); work_tree = create_tree (work_tree, mbc_tree, 0, new_idx); if (BE (new_idx != -1 && mbc_tree != NULL, 1)) ! return work_tree; } else { *************** *** 3045,3057 **** for (;; ++i) { if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE) ! return REG_EBRACK; if (token->type == OP_OPEN_CHAR_CLASS) ! ch = re_string_fetch_byte_case (regexp); else ! ch = re_string_fetch_byte (regexp); if (ch == delim && re_string_peek_byte (regexp, 0) == ']') ! break; elem->opr.name[i] = ch; } re_string_skip_bytes (regexp, 1); --- 3071,3083 ---- for (;; ++i) { if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE) ! return REG_EBRACK; if (token->type == OP_OPEN_CHAR_CLASS) ! ch = re_string_fetch_byte_case (regexp); else ! ch = re_string_fetch_byte (regexp); if (ch == delim && re_string_peek_byte (regexp, 0) == ']') ! break; elem->opr.name[i] = ch; } re_string_skip_bytes (regexp, 1); *************** *** 3106,3165 **** cp = name; table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_WEIGHTMB); extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_INDIRECTMB); idx1 = findidx (&cp); if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) ! /* This isn't a valid character. */ ! return REG_ECOLLATE; /* Build single byte matcing table for this equivalence class. */ char_buf[1] = (unsigned char) '\0'; len = weights[idx1]; for (ch = 0; ch < SBC_MAX; ++ch) ! { ! char_buf[0] = ch; ! cp = char_buf; ! idx2 = findidx (&cp); /* ! idx2 = table[ch]; */ ! if (idx2 == 0) ! /* This isn't a valid character. */ ! continue; ! if (len == weights[idx2]) ! { ! int cnt = 0; ! while (cnt <= len && ! weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) ! ++cnt; ! ! if (cnt > len) ! bitset_set (sbcset, ch); ! } ! } /* Check whether the array has enough space. */ if (*equiv_class_alloc == mbcset->nequiv_classes) ! { ! /* Not enough, realloc it. */ ! /* +1 in case of mbcset->nequiv_classes is 0. */ ! *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; ! /* Use realloc since the array is NULL if *alloc == 0. */ ! mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t, ! *equiv_class_alloc); ! if (BE (mbcset->equiv_classes == NULL, 0)) ! return REG_ESPACE; ! } mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else #endif /* _LIBC && RE_ENABLE_I18N */ { if (BE (strlen ((const char *) name) != 1, 0)) ! return REG_ECOLLATE; bitset_set (sbcset, *name); } return REG_NOERROR; --- 3132,3191 ---- cp = name; table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_WEIGHTMB); extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_INDIRECTMB); idx1 = findidx (&cp); if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) ! /* This isn't a valid character. */ ! return REG_ECOLLATE; /* Build single byte matcing table for this equivalence class. */ char_buf[1] = (unsigned char) '\0'; len = weights[idx1]; for (ch = 0; ch < SBC_MAX; ++ch) ! { ! char_buf[0] = ch; ! cp = char_buf; ! idx2 = findidx (&cp); /* ! idx2 = table[ch]; */ ! if (idx2 == 0) ! /* This isn't a valid character. */ ! continue; ! if (len == weights[idx2]) ! { ! int cnt = 0; ! while (cnt <= len && ! weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) ! ++cnt; ! ! if (cnt > len) ! bitset_set (sbcset, ch); ! } ! } /* Check whether the array has enough space. */ if (*equiv_class_alloc == mbcset->nequiv_classes) ! { ! /* Not enough, realloc it. */ ! /* +1 in case of mbcset->nequiv_classes is 0. */ ! *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; ! /* Use realloc since the array is NULL if *alloc == 0. */ ! mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t, ! *equiv_class_alloc); ! if (BE (mbcset->equiv_classes == NULL, 0)) ! return REG_ESPACE; ! } mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else #endif /* _LIBC && RE_ENABLE_I18N */ { if (BE (strlen ((const char *) name) != 1, 0)) ! return REG_ECOLLATE; bitset_set (sbcset, *name); } return REG_NOERROR; *************** *** 3201,3218 **** *char_class_alloc = 2 * mbcset->nchar_classes + 1; /* Use realloc since array is NULL if *alloc == 0. */ mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t, ! *char_class_alloc); if (BE (mbcset->char_classes == NULL, 0)) ! return REG_ESPACE; } mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); #endif /* RE_ENABLE_I18N */ #define BUILD_CHARCLASS_LOOP(ctype_func)\ ! for (i = 0; i < SBC_MAX; ++i) \ ! { \ ! if (ctype_func (i)) \ ! bitset_set (sbcset, i); \ } if (strcmp (name, "alnum") == 0) --- 3227,3244 ---- *char_class_alloc = 2 * mbcset->nchar_classes + 1; /* Use realloc since array is NULL if *alloc == 0. */ mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t, ! *char_class_alloc); if (BE (mbcset->char_classes == NULL, 0)) ! return REG_ESPACE; } mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); #endif /* RE_ENABLE_I18N */ #define BUILD_CHARCLASS_LOOP(ctype_func)\ ! for (i = 0; i < SBC_MAX; ++i) \ ! { \ ! if (ctype_func (i)) \ ! bitset_set (sbcset, i); \ } if (strcmp (name, "alnum") == 0) *************** *** 3284,3296 **** int i; /* if (syntax & RE_HAT_LISTS_NOT_NEWLINE) ! bitset_set(cset->sbcset, '\0'); */ mbcset->non_match = 1; if (MB_CUR_MAX > 1) ! for (i = 0; i < SBC_MAX; ++i) ! if (__btowc (i) == WEOF) ! bitset_set (sbcset, i); #else /* not RE_ENABLE_I18N */ non_match = 1; #endif /* not RE_ENABLE_I18N */ --- 3310,3322 ---- int i; /* if (syntax & RE_HAT_LISTS_NOT_NEWLINE) ! bitset_set(cset->sbcset, '\0'); */ mbcset->non_match = 1; if (MB_CUR_MAX > 1) ! for (i = 0; i < SBC_MAX; ++i) ! if (__btowc (i) == WEOF) ! bitset_set (sbcset, i); #else /* not RE_ENABLE_I18N */ non_match = 1; #endif /* not RE_ENABLE_I18N */ *************** *** 3299,3307 **** /* We don't care the syntax in this case. */ ret = build_charclass (sbcset, #ifdef RE_ENABLE_I18N ! mbcset, &alloc, #endif /* RE_ENABLE_I18N */ ! (const unsigned char *) "alpha", 0); if (BE (ret != REG_NOERROR, 0)) { --- 3325,3333 ---- /* We don't care the syntax in this case. */ ret = build_charclass (sbcset, #ifdef RE_ENABLE_I18N ! mbcset, &alloc, #endif /* RE_ENABLE_I18N */ ! (const unsigned char *) "alpha", 0); if (BE (ret != REG_NOERROR, 0)) { *************** *** 3343,3355 **** new_idx = re_dfa_add_node (dfa, br_token, 0); mbc_tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || mbc_tree == NULL, 0)) ! goto build_word_op_espace; /* Then join them by ALT node. */ alt_token.type = OP_ALT; new_idx = re_dfa_add_node (dfa, alt_token, 0); tree = create_tree (tree, mbc_tree, 0, new_idx); if (BE (new_idx != -1 && mbc_tree != NULL, 1)) ! return tree; } else { --- 3369,3381 ---- new_idx = re_dfa_add_node (dfa, br_token, 0); mbc_tree = create_tree (NULL, NULL, 0, new_idx); if (BE (new_idx == -1 || mbc_tree == NULL, 0)) ! goto build_word_op_espace; /* Then join them by ALT node. */ alt_token.type = OP_ALT; new_idx = re_dfa_add_node (dfa, alt_token, 0); tree = create_tree (tree, mbc_tree, 0, new_idx); if (BE (new_idx != -1 && mbc_tree != NULL, 1)) ! return tree; } else { *************** *** 3387,3397 **** *token = fetch_token (input, syntax); c = token->opr.c; if (BE (token->type == END_OF_RE, 0)) ! return -2; if (token->type == OP_CLOSE_DUP_NUM || c == ',') ! break; num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) ! ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); num = (num > RE_DUP_MAX) ? -2 : num; } return num; --- 3413,3423 ---- *token = fetch_token (input, syntax); c = token->opr.c; if (BE (token->type == END_OF_RE, 0)) ! return -2; if (token->type == OP_CLOSE_DUP_NUM || c == ',') ! break; num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) ! ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); num = (num > RE_DUP_MAX) ? -2 : num; } return num; *************** *** 3478,3484 **** { left = duplicate_tree (src->left, dfa); if (left == NULL) ! return NULL; } /* Secondaly, duplicate the right. */ --- 3504,3510 ---- { left = duplicate_tree (src->left, dfa); if (left == NULL) ! return NULL; } /* Secondaly, duplicate the right. */ *************** *** 3486,3495 **** { right = duplicate_tree (src->right, dfa); if (right == NULL) ! { ! free_bin_tree (left); ! return NULL; ! } } /* At last, duplicate itself. */ --- 3512,3521 ---- { right = duplicate_tree (src->right, dfa); if (right == NULL) ! { ! free_bin_tree (left); ! return NULL; ! } } /* At last, duplicate itself. */ *************** *** 3498,3508 **** new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0); dfa->nodes[new_node_idx].duplicated = 1; if (BE (new_node_idx == -1, 0)) ! { ! free_bin_tree (left); ! free_bin_tree (right); ! return NULL; ! } } else new_node_idx = src->type; --- 3524,3534 ---- new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0); dfa->nodes[new_node_idx].duplicated = 1; if (BE (new_node_idx == -1, 0)) ! { ! free_bin_tree (left); ! free_bin_tree (right); ! return NULL; ! } } else new_node_idx = src->type; *************** *** 3515,3518 **** } return new_tree; } - --- 3541,3543 ---- diff -rNC3 sed-4.0.3/lib/regex.c sed-4.0.4/lib/regex.c *** sed-4.0.3/lib/regex.c Wed Oct 23 09:17:07 2002 --- sed-4.0.4/lib/regex.c Fri Nov 22 12:00:18 2002 *************** *** 41,46 **** --- 41,54 ---- # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) #endif + #ifdef HAVE_CONFIG_H + #include "config.h" + #endif + + #if (HAVE_WCTYPE_H && HAVE_WCHAR_H) || _LIBC + #define RE_ENABLE_I18N + #endif + #if _LIBC || __GNUC__ >= 3 # define BE(expr, val) __builtin_expect (expr, val) #else diff -rNC3 sed-4.0.3/lib/regex.h sed-4.0.4/lib/regex.h *** sed-4.0.3/lib/regex.h Wed Oct 23 09:17:07 2002 --- sed-4.0.4/lib/regex.h Fri Nov 22 12:01:59 2002 *************** *** 543,557 **** # endif #endif - /* Hacks. :-) */ - reg_errcode_t re_search_internal (const regex_t *preg, - const char *string, int length, - int start, int range, int stop, - size_t nmatch, regmatch_t pmatch[], - int eflags); - reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, - int length, reg_syntax_t syntax); - /* POSIX compatibility. */ extern int regcomp _RE_ARGS ((regex_t *__restrict __preg, const char *__restrict __pattern, --- 543,548 ---- *************** *** 566,571 **** --- 557,571 ---- char *__errbuf, size_t __errbuf_size)); extern void regfree _RE_ARGS ((regex_t *__preg)); + + /* Internal entry points */ + extern reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + int length, reg_syntax_t syntax); + extern reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags); #ifdef __cplusplus diff -rNC3 sed-4.0.3/lib/regex_internal.c sed-4.0.4/lib/regex_internal.c *** sed-4.0.3/lib/regex_internal.c Wed Oct 23 17:51:51 2002 --- sed-4.0.4/lib/regex_internal.c Fri Nov 22 11:53:01 2002 *************** *** 63,87 **** #include "regex_internal.h" static void re_string_construct_common (const char *str, int len, ! re_string_t *pstr, ! RE_TRANSLATE_TYPE trans, int icase); #ifdef RE_ENABLE_I18N ! static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx); #endif /* RE_ENABLE_I18N */ static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int hash); static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, ! unsigned int hash); static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int hash); static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int context, ! unsigned int hash); static unsigned int inline calc_state_hash (const re_node_set *nodes, ! unsigned int context); /* Functions for string operation. */ --- 63,88 ---- #include "regex_internal.h" static void re_string_construct_common (const char *str, int len, ! re_string_t *pstr, ! RE_TRANSLATE_TYPE trans, int icase); #ifdef RE_ENABLE_I18N ! static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, ! wint_t *last_wc); #endif /* RE_ENABLE_I18N */ static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int hash); static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, ! unsigned int hash); static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int hash); static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int context, ! unsigned int hash); static unsigned int inline calc_state_hash (const re_node_set *nodes, ! unsigned int context); /* Functions for string operation. */ *************** *** 105,114 **** return ret; pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case ! : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr) ! || MB_CUR_MAX > 1) ? pstr->valid_len : len; return REG_NOERROR; } --- 106,115 ---- return ret; pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case ! : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr) ! || MB_CUR_MAX > 1) ? pstr->valid_len : len; return REG_NOERROR; } *************** *** 131,164 **** { ret = re_string_realloc_buffers (pstr, len + 1); if (BE (ret != REG_NOERROR, 0)) ! return ret; } pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case ! : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; if (icase) { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_upper_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! build_upper_buffer (pstr); } else { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! { ! if (trans != NULL) ! re_string_translate_buffer (pstr); ! else ! pstr->valid_len = len; ! } } /* Initialized whole buffers, then valid_len == bufs_len. */ --- 132,165 ---- { ret = re_string_realloc_buffers (pstr, len + 1); if (BE (ret != REG_NOERROR, 0)) ! return ret; } pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case ! : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; if (icase) { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_upper_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! build_upper_buffer (pstr); } else { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! { ! if (trans != NULL) ! re_string_translate_buffer (pstr); ! else ! pstr->valid_len = len; ! } } /* Initialized whole buffers, then valid_len == bufs_len. */ *************** *** 176,199 **** #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { ! pstr->wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); ! if (BE (pstr->wcs == NULL, 0)) ! return REG_ESPACE; } #endif /* RE_ENABLE_I18N */ if (MBS_ALLOCATED (pstr)) { ! pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len); ! if (BE (pstr->mbs == NULL, 0)) ! return REG_ESPACE; } if (MBS_CASE_ALLOCATED (pstr)) { ! pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len); ! if (BE (pstr->mbs_case == NULL, 0)) ! return REG_ESPACE; if (!MBS_ALLOCATED (pstr)) ! pstr->mbs = pstr->mbs_case; } pstr->bufs_len = new_buf_len; return REG_NOERROR; --- 177,205 ---- #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { ! wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len); ! if (BE (new_array == NULL, 0)) ! return REG_ESPACE; ! pstr->wcs = new_array; } #endif /* RE_ENABLE_I18N */ if (MBS_ALLOCATED (pstr)) { ! unsigned char *new_array = re_realloc (pstr->mbs, unsigned char, ! new_buf_len); ! if (BE (new_array == NULL, 0)) ! return REG_ESPACE; ! pstr->mbs = new_array; } if (MBS_CASE_ALLOCATED (pstr)) { ! unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char, ! new_buf_len); ! if (BE (new_array == NULL, 0)) ! return REG_ESPACE; ! pstr->mbs_case = new_array; if (!MBS_ALLOCATED (pstr)) ! pstr->mbs = pstr->mbs_case; } pstr->bufs_len = new_buf_len; return REG_NOERROR; *************** *** 243,274 **** remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx ! + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) ! { ! /* The buffer doesn't have enough space, finish to build. */ ! pstr->cur_state = prev_st; ! break; ! } else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) ! { ! /* We treat these cases as a singlebyte character. */ ! mbclen = 1; ! wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; ! pstr->cur_state = prev_st; ! } /* Apply the translateion if we need. */ if (pstr->trans != NULL && mbclen == 1) ! { ! int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]]; ! pstr->mbs_case[byte_idx] = ch; ! } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; /* Write paddings. */ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) ! pstr->wcs[byte_idx++] = WEOF; } pstr->valid_len = byte_idx; } --- 249,280 ---- remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx ! + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) ! { ! /* The buffer doesn't have enough space, finish to build. */ ! pstr->cur_state = prev_st; ! break; ! } else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) ! { ! /* We treat these cases as a singlebyte character. */ ! mbclen = 1; ! wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; ! pstr->cur_state = prev_st; ! } /* Apply the translateion if we need. */ if (pstr->trans != NULL && mbclen == 1) ! { ! int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]]; ! pstr->mbs_case[byte_idx] = ch; ! } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; /* Write paddings. */ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) ! pstr->wcs[byte_idx++] = WEOF; } pstr->valid_len = byte_idx; } *************** *** 291,330 **** remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx ! + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) ! { ! /* The buffer doesn't have enough space, finish to build. */ ! pstr->cur_state = prev_st; ! break; ! } else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) ! { ! /* In case of a singlebyte character. */ ! int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; ! /* Apply the translateion if we need. */ ! if (pstr->trans != NULL && mbclen == 1) ! { ! ch = pstr->trans[ch]; ! pstr->mbs_case[byte_idx] = ch; ! } ! pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc; ! pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; ! if (BE (mbclen == (size_t) -1, 0)) ! pstr->cur_state = prev_st; ! } else /* mbclen > 1 */ ! { ! if (iswlower (wc)) ! wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); ! else ! memcpy (pstr->mbs + byte_idx, ! pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); ! pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc; ! /* Write paddings. */ ! for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) ! pstr->wcs[byte_idx++] = WEOF; ! } } pstr->valid_len = byte_idx; } --- 297,336 ---- remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx ! + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) ! { ! /* The buffer doesn't have enough space, finish to build. */ ! pstr->cur_state = prev_st; ! break; ! } else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) ! { ! /* In case of a singlebyte character. */ ! int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; ! /* Apply the translateion if we need. */ ! if (pstr->trans != NULL && mbclen == 1) ! { ! ch = pstr->trans[ch]; ! pstr->mbs_case[byte_idx] = ch; ! } ! pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc; ! pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; ! if (BE (mbclen == (size_t) -1, 0)) ! pstr->cur_state = prev_st; ! } else /* mbclen > 1 */ ! { ! if (iswlower (wc)) ! wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); ! else ! memcpy (pstr->mbs + byte_idx, ! pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); ! pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc; ! /* Write paddings. */ ! for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) ! pstr->wcs[byte_idx++] = WEOF; ! } } pstr->valid_len = byte_idx; } *************** *** 333,367 **** Return the index. */ static int ! re_string_skip_chars (pstr, new_raw_idx) re_string_t *pstr; int new_raw_idx; { mbstate_t prev_st; int rawbuf_idx, mbclen; /* Skip the characters which are not necessary to check. */ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len; rawbuf_idx < new_raw_idx;) { ! int remain_len = pstr->len - rawbuf_idx; prev_st = pstr->cur_state; ! mbclen = mbrlen ((const char *) pstr->raw_mbs + rawbuf_idx, remain_len, ! &pstr->cur_state); if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) ! { ! /* We treat these cases as a singlebyte character. */ ! mbclen = 1; ! pstr->cur_state = prev_st; ! } /* Then proceed the next character. */ rawbuf_idx += mbclen; } return rawbuf_idx; } #endif /* RE_ENABLE_I18N */ ! /* Build the buffer PSTR->MBS, and apply the translation if we need. This function is used in case of REG_ICASE. */ static void --- 339,377 ---- Return the index. */ static int ! re_string_skip_chars (pstr, new_raw_idx, last_wc) re_string_t *pstr; int new_raw_idx; + wint_t *last_wc; { mbstate_t prev_st; int rawbuf_idx, mbclen; + wchar_t wc = 0; /* Skip the characters which are not necessary to check. */ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len; rawbuf_idx < new_raw_idx;) { ! int remain_len; ! remain_len = pstr->len - rawbuf_idx; prev_st = pstr->cur_state; ! mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, ! remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) ! { ! /* We treat these cases as a singlebyte character. */ ! mbclen = 1; ! pstr->cur_state = prev_st; ! } /* Then proceed the next character. */ rawbuf_idx += mbclen; } + *last_wc = (wint_t) wc; return rawbuf_idx; } #endif /* RE_ENABLE_I18N */ ! /* Build the buffer PSTR->MBS, and apply the translation if we need. This function is used in case of REG_ICASE. */ static void *************** *** 375,388 **** { int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; if (pstr->trans != NULL) ! { ! ch = pstr->trans[ch]; ! pstr->mbs_case[char_idx] = ch; ! } if (islower (ch)) ! pstr->mbs[char_idx] = toupper (ch); else ! pstr->mbs[char_idx] = ch; } pstr->valid_len = char_idx; } --- 385,398 ---- { int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; if (pstr->trans != NULL) ! { ! ch = pstr->trans[ch]; ! pstr->mbs_case[char_idx] = ch; ! } if (islower (ch)) ! pstr->mbs[char_idx] = toupper (ch); else ! pstr->mbs[char_idx] = ch; } pstr->valid_len = char_idx; } *************** *** 420,484 **** /* Reset buffer. */ #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); #endif /* RE_ENABLE_I18N */ pstr->len += pstr->raw_mbs_idx; pstr->stop += pstr->raw_mbs_idx; pstr->valid_len = pstr->raw_mbs_idx = 0; pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF ! : CONTEXT_NEWLINE | CONTEXT_BEGBUF); if (!MBS_CASE_ALLOCATED (pstr)) ! pstr->mbs_case = (unsigned char *) pstr->raw_mbs; if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr)) ! pstr->mbs = (unsigned char *) pstr->raw_mbs; offset = idx; } if (offset != 0) { - pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags, - newline); /* Are the characters which are already checked remain? */ if (offset < pstr->valid_len) ! { ! /* Yes, move them to the front of the buffer. */ #ifdef RE_ENABLE_I18N ! if (MB_CUR_MAX > 1) ! memmove (pstr->wcs, pstr->wcs + offset, ! (pstr->valid_len - offset) * sizeof (wint_t)); #endif /* RE_ENABLE_I18N */ ! if (MBS_ALLOCATED (pstr)) ! memmove (pstr->mbs, pstr->mbs + offset, ! pstr->valid_len - offset); ! if (MBS_CASE_ALLOCATED (pstr)) ! memmove (pstr->mbs_case, pstr->mbs_case + offset, ! pstr->valid_len - offset); ! pstr->valid_len -= offset; #if DEBUG ! assert (pstr->valid_len > 0); #endif ! } else ! { ! /* No, skip all characters until IDX. */ ! pstr->valid_len = 0; #ifdef RE_ENABLE_I18N ! if (MB_CUR_MAX > 1) ! { ! int wcs_idx; ! pstr->valid_len = re_string_skip_chars (pstr, idx) - idx; ! for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) ! pstr->wcs[wcs_idx] = WEOF; ! } #endif /* RE_ENABLE_I18N */ ! } if (!MBS_CASE_ALLOCATED (pstr)) ! { ! pstr->mbs_case += offset; ! /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */ ! if (!MBS_ALLOCATED (pstr)) ! pstr->mbs += offset; ! } } pstr->raw_mbs_idx = idx; pstr->len -= offset; --- 430,509 ---- /* Reset buffer. */ #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); #endif /* RE_ENABLE_I18N */ pstr->len += pstr->raw_mbs_idx; pstr->stop += pstr->raw_mbs_idx; pstr->valid_len = pstr->raw_mbs_idx = 0; pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF ! : CONTEXT_NEWLINE | CONTEXT_BEGBUF); if (!MBS_CASE_ALLOCATED (pstr)) ! pstr->mbs_case = (unsigned char *) pstr->raw_mbs; if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr)) ! pstr->mbs = (unsigned char *) pstr->raw_mbs; offset = idx; } if (offset != 0) { /* Are the characters which are already checked remain? */ if (offset < pstr->valid_len) ! { ! /* Yes, move them to the front of the buffer. */ ! pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags, ! newline); #ifdef RE_ENABLE_I18N ! if (MB_CUR_MAX > 1) ! memmove (pstr->wcs, pstr->wcs + offset, ! (pstr->valid_len - offset) * sizeof (wint_t)); #endif /* RE_ENABLE_I18N */ ! if (MBS_ALLOCATED (pstr)) ! memmove (pstr->mbs, pstr->mbs + offset, ! pstr->valid_len - offset); ! if (MBS_CASE_ALLOCATED (pstr)) ! memmove (pstr->mbs_case, pstr->mbs_case + offset, ! pstr->valid_len - offset); ! pstr->valid_len -= offset; #if DEBUG ! assert (pstr->valid_len > 0); #endif ! } else ! { ! /* No, skip all characters until IDX. */ ! pstr->valid_len = 0; #ifdef RE_ENABLE_I18N ! if (MB_CUR_MAX > 1) ! { ! int wcs_idx; ! wint_t wc; ! pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; ! for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) ! pstr->wcs[wcs_idx] = WEOF; ! if (pstr->trans && wc <= 0xff) ! wc = pstr->trans[wc]; ! pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD ! : ((newline && IS_WIDE_NEWLINE (wc)) ! ? CONTEXT_NEWLINE : 0)); ! } ! else #endif /* RE_ENABLE_I18N */ ! { ! int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; ! if (pstr->trans) ! c = pstr->trans[c]; ! pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD ! : ((newline && IS_NEWLINE (c)) ! ? CONTEXT_NEWLINE : 0)); ! } ! } if (!MBS_CASE_ALLOCATED (pstr)) ! { ! pstr->mbs_case += offset; ! /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */ ! if (!MBS_ALLOCATED (pstr)) ! pstr->mbs += offset; ! } } pstr->raw_mbs_idx = idx; pstr->len -= offset; *************** *** 489,505 **** if (MB_CUR_MAX > 1) { if (pstr->icase) ! build_wcs_upper_buffer (pstr); else ! build_wcs_buffer (pstr); } else #endif /* RE_ENABLE_I18N */ { if (pstr->icase) ! build_upper_buffer (pstr); else if (pstr->trans != NULL) ! re_string_translate_buffer (pstr); } pstr->cur_idx = 0; --- 514,530 ---- if (MB_CUR_MAX > 1) { if (pstr->icase) ! build_wcs_upper_buffer (pstr); else ! build_wcs_buffer (pstr); } else #endif /* RE_ENABLE_I18N */ { if (pstr->icase) ! build_upper_buffer (pstr); else if (pstr->trans != NULL) ! re_string_translate_buffer (pstr); } pstr->cur_idx = 0; *************** *** 530,546 **** if (idx < 0 || idx == input->len) { if (idx < 0) ! /* In this case, we use the value stored in input->tip_context, ! since we can't know the character in input->mbs[-1] here. */ ! return input->tip_context; else /* (idx == input->len) */ ! return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF ! : CONTEXT_NEWLINE | CONTEXT_ENDBUF); } - c = re_string_byte_at (input, idx); - if (IS_WORD_CHAR (c)) - return CONTEXT_WORD; - return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0; } /* Functions for set operation. */ --- 555,593 ---- if (idx < 0 || idx == input->len) { if (idx < 0) ! /* In this case, we use the value stored in input->tip_context, ! since we can't know the character in input->mbs[-1] here. */ ! return input->tip_context; else /* (idx == input->len) */ ! return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF ! : CONTEXT_NEWLINE | CONTEXT_ENDBUF); ! } ! if (MB_CUR_MAX == 1) ! { ! c = re_string_byte_at (input, idx); ! if (IS_WORD_CHAR (c)) ! return CONTEXT_WORD; ! return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0; ! } ! else ! { ! wint_t wc; ! int wc_idx = idx; ! while(input->wcs[wc_idx] == WEOF) ! { ! #ifdef DEBUG ! /* It must not happen. */ ! assert (wc_idx >= 0); ! #endif ! --wc_idx; ! if (wc_idx < 0) ! return input->tip_context; ! } ! wc = input->wcs[wc_idx]; ! if (IS_WIDE_WORD_CHAR (wc)) ! return CONTEXT_WORD; ! return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0; } } /* Functions for set operation. */ *************** *** 590,604 **** { set->nelem = 2; if (elem1 < elem2) ! { ! set->elems[0] = elem1; ! set->elems[1] = elem2; ! } else ! { ! set->elems[0] = elem2; ! set->elems[1] = elem1; ! } } return REG_NOERROR; } --- 637,651 ---- { set->nelem = 2; if (elem1 < elem2) ! { ! set->elems[0] = elem1; ! set->elems[1] = elem2; ! } else ! { ! set->elems[0] = elem2; ! set->elems[1] = elem1; ! } } return REG_NOERROR; } *************** *** 614,620 **** dest->alloc = dest->nelem; dest->elems = re_malloc (int, dest->alloc); if (BE (dest->elems == NULL, 0)) ! return REG_ESPACE; memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); } else --- 661,667 ---- dest->alloc = dest->nelem; dest->elems = re_malloc (int, dest->alloc); if (BE (dest->elems == NULL, 0)) ! return REG_ESPACE; memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); } else *************** *** 635,646 **** if (src1->nelem > 0 && src2->nelem > 0) { if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) ! { ! dest->alloc = src1->nelem + src2->nelem + dest->nelem; ! dest->elems = re_realloc (dest->elems, int, dest->alloc); ! if (BE (dest->elems == NULL, 0)) ! return REG_ESPACE; ! } } else return REG_NOERROR; --- 682,693 ---- if (src1->nelem > 0 && src2->nelem > 0) { if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) ! { ! dest->alloc = src1->nelem + src2->nelem + dest->nelem; ! dest->elems = re_realloc (dest->elems, int, dest->alloc); ! if (BE (dest->elems == NULL, 0)) ! return REG_ESPACE; ! } } else return REG_NOERROR; *************** *** 648,671 **** for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) { if (src1->elems[i1] > src2->elems[i2]) ! { ! ++i2; ! continue; ! } if (src1->elems[i1] == src2->elems[i2]) ! { ! while (id < dest->nelem && dest->elems[id] < src2->elems[i2]) ! ++id; ! if (id < dest->nelem && dest->elems[id] == src2->elems[i2]) ! ++id; ! else ! { ! memmove (dest->elems + id + 1, dest->elems + id, ! sizeof (int) * (dest->nelem - id)); ! dest->elems[id++] = src2->elems[i2++]; ! ++dest->nelem; ! } ! } ++i1; } return REG_NOERROR; --- 695,718 ---- for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) { if (src1->elems[i1] > src2->elems[i2]) ! { ! ++i2; ! continue; ! } if (src1->elems[i1] == src2->elems[i2]) ! { ! while (id < dest->nelem && dest->elems[id] < src2->elems[i2]) ! ++id; ! if (id < dest->nelem && dest->elems[id] == src2->elems[i2]) ! ++id; ! else ! { ! memmove (dest->elems + id + 1, dest->elems + id, ! sizeof (int) * (dest->nelem - id)); ! dest->elems[id++] = src2->elems[i2++]; ! ++dest->nelem; ! } ! } ++i1; } return REG_NOERROR; *************** *** 685,723 **** dest->alloc = src1->nelem + src2->nelem; dest->elems = re_malloc (int, dest->alloc); if (BE (dest->elems == NULL, 0)) ! return REG_ESPACE; } else { if (src1 != NULL && src1->nelem > 0) ! return re_node_set_init_copy (dest, src1); else if (src2 != NULL && src2->nelem > 0) ! return re_node_set_init_copy (dest, src2); else ! re_node_set_init_empty (dest); return REG_NOERROR; } for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) { if (src1->elems[i1] > src2->elems[i2]) ! { ! dest->elems[id++] = src2->elems[i2++]; ! continue; ! } if (src1->elems[i1] == src2->elems[i2]) ! ++i2; dest->elems[id++] = src1->elems[i1++]; } if (i1 < src1->nelem) { memcpy (dest->elems + id, src1->elems + i1, ! (src1->nelem - i1) * sizeof (int)); id += src1->nelem - i1; } else if (i2 < src2->nelem) { memcpy (dest->elems + id, src2->elems + i2, ! (src2->nelem - i2) * sizeof (int)); id += src2->nelem - i2; } dest->nelem = id; --- 732,770 ---- dest->alloc = src1->nelem + src2->nelem; dest->elems = re_malloc (int, dest->alloc); if (BE (dest->elems == NULL, 0)) ! return REG_ESPACE; } else { if (src1 != NULL && src1->nelem > 0) ! return re_node_set_init_copy (dest, src1); else if (src2 != NULL && src2->nelem > 0) ! return re_node_set_init_copy (dest, src2); else ! re_node_set_init_empty (dest); return REG_NOERROR; } for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) { if (src1->elems[i1] > src2->elems[i2]) ! { ! dest->elems[id++] = src2->elems[i2++]; ! continue; ! } if (src1->elems[i1] == src2->elems[i2]) ! ++i2; dest->elems[id++] = src1->elems[i1++]; } if (i1 < src1->nelem) { memcpy (dest->elems + id, src1->elems + i1, ! (src1->nelem - i1) * sizeof (int)); id += src1->nelem - i1; } else if (i2 < src2->nelem) { memcpy (dest->elems + id, src2->elems + i2, ! (src2->nelem - i2) * sizeof (int)); id += src2->nelem - i2; } dest->nelem = id; *************** *** 737,746 **** return REG_NOERROR; if (dest->alloc < src->nelem + dest->nelem) { dest->alloc = 2 * (src->nelem + dest->alloc); ! dest->elems = re_realloc (dest->elems, int, dest->alloc); ! if (BE (dest->elems == NULL, 0)) ! return REG_ESPACE; } for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;) --- 784,795 ---- return REG_NOERROR; if (dest->alloc < src->nelem + dest->nelem) { + int *new_buffer; dest->alloc = 2 * (src->nelem + dest->alloc); ! new_buffer = re_realloc (dest->elems, int, dest->alloc); ! if (BE (new_buffer == NULL, 0)) ! return REG_ESPACE; ! dest->elems = new_buffer; } for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;) *************** *** 749,782 **** /* Binary search the spot we will add the new element. */ right = dest->nelem; while (di < right) ! { ! mid = (di + right) / 2; ! if (dest->elems[mid] < src_elem) ! di = mid + 1; ! else ! right = mid; ! } if (di >= dest->nelem) ! break; if (dest->elems[di] == src_elem) ! { ! /* Skip since, DEST already has the element. */ ! ++di; ! ++si; ! continue; ! } /* Skip the src elements which are less than dest->elems[di]. */ cp_from = si; while (si < src->nelem && src->elems[si] < dest->elems[di]) ! ++si; /* Copy these src elements. */ ncp = si - cp_from; memmove (dest->elems + di + ncp, dest->elems + di, ! sizeof (int) * (dest->nelem - di)); memcpy (dest->elems + di, src->elems + cp_from, ! sizeof (int) * ncp); /* Update counters. */ di += ncp; dest->nelem += ncp; --- 798,831 ---- /* Binary search the spot we will add the new element. */ right = dest->nelem; while (di < right) ! { ! mid = (di + right) / 2; ! if (dest->elems[mid] < src_elem) ! di = mid + 1; ! else ! right = mid; ! } if (di >= dest->nelem) ! break; if (dest->elems[di] == src_elem) ! { ! /* Skip since, DEST already has the element. */ ! ++di; ! ++si; ! continue; ! } /* Skip the src elements which are less than dest->elems[di]. */ cp_from = si; while (si < src->nelem && src->elems[si] < dest->elems[di]) ! ++si; /* Copy these src elements. */ ncp = si - cp_from; memmove (dest->elems + di + ncp, dest->elems + di, ! sizeof (int) * (dest->nelem - di)); memcpy (dest->elems + di, src->elems + cp_from, ! sizeof (int) * ncp); /* Update counters. */ di += ncp; dest->nelem += ncp; *************** *** 786,792 **** if (si < src->nelem) { memcpy (dest->elems + di, src->elems + si, ! sizeof (int) * (src->nelem - si)); dest->nelem += src->nelem - si; } return REG_NOERROR; --- 835,841 ---- if (si < src->nelem) { memcpy (dest->elems + di, src->elems + si, ! sizeof (int) * (src->nelem - si)); dest->nelem += src->nelem - si; } return REG_NOERROR; *************** *** 806,814 **** if (set->elems == NULL || set->alloc == 0) { if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) ! return 1; else ! return -1; } /* Binary search the spot we will add the new element. */ --- 855,863 ---- if (set->elems == NULL || set->alloc == 0) { if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) ! return 1; else ! return -1; } /* Binary search the spot we will add the new element. */ *************** *** 818,826 **** { mid = (idx + right) / 2; if (set->elems[mid] < elem) ! idx = mid + 1; else ! right = mid; } /* Realloc if we need. */ --- 867,875 ---- { mid = (idx + right) / 2; if (set->elems[mid] < elem) ! idx = mid + 1; else ! right = mid; } /* Realloc if we need. */ *************** *** 830,843 **** set->alloc = set->alloc * 2; new_array = re_malloc (int, set->alloc); if (BE (new_array == NULL, 0)) ! return -1; /* Copy the elements they are followed by the new element. */ if (idx > 0) ! memcpy (new_array, set->elems, sizeof (int) * (idx)); /* Copy the elements which follows the new element. */ if (set->nelem - idx > 0) ! memcpy (new_array + idx + 1, set->elems + idx, ! sizeof (int) * (set->nelem - idx)); re_free (set->elems); set->elems = new_array; } --- 879,892 ---- set->alloc = set->alloc * 2; new_array = re_malloc (int, set->alloc); if (BE (new_array == NULL, 0)) ! return -1; /* Copy the elements they are followed by the new element. */ if (idx > 0) ! memcpy (new_array, set->elems, sizeof (int) * (idx)); /* Copy the elements which follows the new element. */ if (set->nelem - idx > 0) ! memcpy (new_array + idx + 1, set->elems + idx, ! sizeof (int) * (set->nelem - idx)); re_free (set->elems); set->elems = new_array; } *************** *** 845,852 **** { /* Move the elements which follows the new element. */ if (set->nelem - idx > 0) ! memmove (set->elems + idx + 1, set->elems + idx, ! sizeof (int) * (set->nelem - idx)); } /* Insert the new element. */ set->elems[idx] = elem; --- 894,901 ---- { /* Move the elements which follows the new element. */ if (set->nelem - idx > 0) ! memmove (set->elems + idx + 1, set->elems + idx, ! sizeof (int) * (set->nelem - idx)); } /* Insert the new element. */ set->elems[idx] = elem; *************** *** 888,896 **** { mid = (idx + right) / 2; if (set->elems[mid] < elem) ! idx = mid + 1; else ! right = mid; } return set->elems[idx] == elem ? idx + 1 : 0; } --- 937,945 ---- { mid = (idx + right) / 2; if (set->elems[mid] < elem) ! idx = mid + 1; else ! right = mid; } return set->elems[idx] == elem ? idx + 1 : 0; } *************** *** 904,910 **** return; if (idx < set->nelem - 1) memmove (set->elems + idx, set->elems + idx + 1, ! sizeof (int) * (set->nelem - idx - 1)); --set->nelem; } --- 953,959 ---- return; if (idx < set->nelem - 1) memmove (set->elems + idx, set->elems + idx + 1, ! sizeof (int) * (set->nelem - idx - 1)); --set->nelem; } *************** *** 924,951 **** dfa->nodes_alloc *= 2; new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc); if (BE (new_array == NULL, 0)) ! return -1; else ! dfa->nodes = new_array; if (mode) ! { ! int *new_nexts; ! re_node_set *new_edests, *new_eclosures, *new_inveclosures; ! ! new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc); ! new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc); ! new_eclosures = re_realloc (dfa->eclosures, re_node_set, ! dfa->nodes_alloc); ! new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, ! dfa->nodes_alloc); ! if (BE (new_nexts == NULL || new_edests == NULL ! || new_eclosures == NULL || new_inveclosures == NULL, 0)) ! return -1; ! dfa->nexts = new_nexts; ! dfa->edests = new_edests; ! dfa->eclosures = new_eclosures; ! dfa->inveclosures = new_inveclosures; ! } } dfa->nodes[dfa->nodes_len] = token; dfa->nodes[dfa->nodes_len].duplicated = 0; --- 973,1000 ---- dfa->nodes_alloc *= 2; new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc); if (BE (new_array == NULL, 0)) ! return -1; else ! dfa->nodes = new_array; if (mode) ! { ! int *new_nexts; ! re_node_set *new_edests, *new_eclosures, *new_inveclosures; ! ! new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc); ! new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc); ! new_eclosures = re_realloc (dfa->eclosures, re_node_set, ! dfa->nodes_alloc); ! new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, ! dfa->nodes_alloc); ! if (BE (new_nexts == NULL || new_edests == NULL ! || new_eclosures == NULL || new_inveclosures == NULL, 0)) ! return -1; ! dfa->nexts = new_nexts; ! dfa->edests = new_edests; ! dfa->eclosures = new_eclosures; ! dfa->inveclosures = new_inveclosures; ! } } dfa->nodes[dfa->nodes_len] = token; dfa->nodes[dfa->nodes_len].duplicated = 0; *************** *** 970,978 **** Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that ! return value is NULL and ERR is REG_NOERROR. ! - We never return non-NULL value in case of any errors, it is for ! optimization. */ static re_dfastate_t* re_acquire_state (err, dfa, nodes) --- 1019,1027 ---- Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that ! return value is NULL and ERR is REG_NOERROR. ! - We never return non-NULL value in case of any errors, it is for ! optimization. */ static re_dfastate_t* re_acquire_state (err, dfa, nodes) *************** *** 996,1004 **** { re_dfastate_t *state = spot->array[i]; if (hash != state->hash) ! continue; if (re_node_set_compare (&state->nodes, nodes)) ! return state; } /* There are no appropriate state in the dfa, create the new one. */ --- 1045,1053 ---- { re_dfastate_t *state = spot->array[i]; if (hash != state->hash) ! continue; if (re_node_set_compare (&state->nodes, nodes)) ! return state; } /* There are no appropriate state in the dfa, create the new one. */ *************** *** 1018,1026 **** Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that ! return value is NULL and ERR is REG_NOERROR. ! - We never return non-NULL value in case of any errors, it is for ! optimization. */ static re_dfastate_t* re_acquire_state_context (err, dfa, nodes, context) --- 1067,1075 ---- Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that ! return value is NULL and ERR is REG_NOERROR. ! - We never return non-NULL value in case of any errors, it is for ! optimization. */ static re_dfastate_t* re_acquire_state_context (err, dfa, nodes, context) *************** *** 1045,1054 **** { re_dfastate_t *state = spot->array[i]; if (hash != state->hash) ! continue; if (re_node_set_compare (state->entrance_nodes, nodes) ! && state->context == context) ! return state; } /* There are no appropriate state in `dfa', create the new one. */ new_state = create_cd_newstate (dfa, nodes, context, hash); --- 1094,1103 ---- { re_dfastate_t *state = spot->array[i]; if (hash != state->hash) ! continue; if (re_node_set_compare (state->entrance_nodes, nodes) ! && state->context == context) ! return state; } /* There are no appropriate state in `dfa', create the new one. */ new_state = create_cd_newstate (dfa, nodes, context, hash); *************** *** 1071,1080 **** unsigned int hash; { re_dfastate_t *newstate; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); if (BE (newstate == NULL, 0)) return NULL; ! re_node_set_init_copy (&newstate->nodes, nodes); newstate->trtable = NULL; newstate->trtable_search = NULL; newstate->hash = hash; --- 1120,1135 ---- unsigned int hash; { re_dfastate_t *newstate; + reg_errcode_t err; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); if (BE (newstate == NULL, 0)) return NULL; ! err = re_node_set_init_copy (&newstate->nodes, nodes); ! if (BE (err != REG_NOERROR, 0)) ! { ! re_free (newstate); ! return NULL; ! } newstate->trtable = NULL; newstate->trtable_search = NULL; newstate->hash = hash; *************** *** 1095,1104 **** if (spot->alloc <= spot->num) { spot->alloc = 2 * spot->num + 2; ! spot->array = re_realloc (spot->array, re_dfastate_t *, spot->alloc); ! if (BE (spot->array == NULL, 0)) ! return REG_ESPACE; } spot->array[spot->num++] = newstate; return REG_NOERROR; --- 1150,1161 ---- if (spot->alloc <= spot->num) { + re_dfastate_t **new_array; spot->alloc = 2 * spot->num + 2; ! new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc); ! if (BE (new_array == NULL, 0)) ! return REG_ESPACE; ! spot->array = new_array; } spot->array[spot->num++] = newstate; return REG_NOERROR; *************** *** 1126,1148 **** re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (type == CHARACTER && !node->constraint) ! continue; /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) ! newstate->halt = 1; #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET ! || (type == OP_PERIOD && MB_CUR_MAX > 1)) ! newstate->accept_mb = 1; #endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) ! newstate->has_backref = 1; else if (type == ANCHOR || node->constraint) ! newstate->has_constraint = 1; } err = register_state (dfa, newstate, hash); ! return (err != REG_NOERROR) ? NULL : newstate; } /* Create the new state which is depend on the context CONTEXT. --- 1183,1210 ---- re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (type == CHARACTER && !node->constraint) ! continue; /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) ! newstate->halt = 1; #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET ! || (type == OP_PERIOD && MB_CUR_MAX > 1)) ! newstate->accept_mb = 1; #endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) ! newstate->has_backref = 1; else if (type == ANCHOR || node->constraint) ! newstate->has_constraint = 1; } err = register_state (dfa, newstate, hash); ! if (BE (err != REG_NOERROR, 0)) ! { ! free_state (newstate); ! newstate = NULL; ! } ! return newstate; } /* Create the new state which is depend on the context CONTEXT. *************** *** 1170,1212 **** re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (node->constraint) ! constraint = node->constraint; if (type == CHARACTER && !constraint) ! continue; /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) ! newstate->halt = 1; #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET ! || (type == OP_PERIOD && MB_CUR_MAX > 1)) ! newstate->accept_mb = 1; #endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) ! newstate->has_backref = 1; else if (type == ANCHOR) ! constraint = node->opr.ctx_type; if (constraint) ! { ! if (newstate->entrance_nodes == &newstate->nodes) ! { ! newstate->entrance_nodes = re_malloc (re_node_set, 1); ! if (BE (newstate->entrance_nodes == NULL, 0)) ! return NULL; ! re_node_set_init_copy (newstate->entrance_nodes, nodes); ! nctx_nodes = 0; ! newstate->has_constraint = 1; ! } ! ! if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) ! { ! re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); ! ++nctx_nodes; ! } ! } } err = register_state (dfa, newstate, hash); ! return (err != REG_NOERROR) ? NULL : newstate; } --- 1232,1296 ---- re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (node->constraint) ! constraint = node->constraint; if (type == CHARACTER && !constraint) ! continue; /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) ! newstate->halt = 1; #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET ! || (type == OP_PERIOD && MB_CUR_MAX > 1)) ! newstate->accept_mb = 1; #endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) ! newstate->has_backref = 1; else if (type == ANCHOR) ! constraint = node->opr.ctx_type; if (constraint) ! { ! if (newstate->entrance_nodes == &newstate->nodes) ! { ! newstate->entrance_nodes = re_malloc (re_node_set, 1); ! if (BE (newstate->entrance_nodes == NULL, 0)) ! { ! free_state (newstate); ! return NULL; ! } ! re_node_set_init_copy (newstate->entrance_nodes, nodes); ! nctx_nodes = 0; ! newstate->has_constraint = 1; ! } ! ! if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) ! { ! re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); ! ++nctx_nodes; ! } ! } } err = register_state (dfa, newstate, hash); ! if (BE (err != REG_NOERROR, 0)) ! { ! free_state (newstate); ! newstate = NULL; ! } ! return newstate; } + static void + free_state (state) + re_dfastate_t *state; + { + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->trtable); + re_free (state->trtable_search); + re_free (state); + } diff -rNC3 sed-4.0.3/lib/regex_internal.h sed-4.0.4/lib/regex_internal.h *** sed-4.0.3/lib/regex_internal.h Wed Oct 23 17:42:16 2002 --- sed-4.0.4/lib/regex_internal.h Fri Nov 22 12:18:11 2002 *************** *** 30,35 **** --- 30,36 ---- /* The character which represents newline. */ #define NEWLINE_CHAR '\n' + #define WIDE_NEWLINE_CHAR L'\n' /* Rename to standard API for using out of glibc. */ #ifndef _LIBC *************** *** 190,209 **** { union { ! unsigned char c; /* for CHARACTER */ ! re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ #ifdef RE_ENABLE_I18N ! re_charset_t *mbcset; /* for COMPLEX_BRACKET */ #endif /* RE_ENABLE_I18N */ ! int idx; /* for BACK_REF */ ! re_context_type ctx_type; /* for ANCHOR */ } opr; #if __GNUC__ >= 2 re_token_type_t type : 8; #else re_token_type_t type; #endif ! unsigned int constraint : 10; /* context constraint */ unsigned int duplicated : 1; #ifdef RE_ENABLE_I18N unsigned int mb_partial : 1; --- 191,210 ---- { union { ! unsigned char c; /* for CHARACTER */ ! re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ #ifdef RE_ENABLE_I18N ! re_charset_t *mbcset; /* for COMPLEX_BRACKET */ #endif /* RE_ENABLE_I18N */ ! int idx; /* for BACK_REF */ ! re_context_type ctx_type; /* for ANCHOR */ } opr; #if __GNUC__ >= 2 re_token_type_t type : 8; #else re_token_type_t type; #endif ! unsigned int constraint : 10; /* context constraint */ unsigned int duplicated : 1; #ifdef RE_ENABLE_I18N unsigned int mb_partial : 1; *************** *** 211,219 **** } re_token_t; #define IS_EPSILON_NODE(type) \ ! ((1 << (type)) & ((1 << OP_ALT) | (1 << OP_DUP_ASTERISK) | (1 << OP_DUP_PLUS) \ ! | (1 << OP_DUP_QUESTION) | (1 << ANCHOR) \ ! | (1 << OP_OPEN_SUBEXP) | (1 << OP_CLOSE_SUBEXP))) #define ACCEPT_MB_NODE(type) \ ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD) --- 212,220 ---- } re_token_t; #define IS_EPSILON_NODE(type) \ ! ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \ ! || (type) == OP_DUP_QUESTION || (type) == ANCHOR \ ! || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP) #define ACCEPT_MB_NODE(type) \ ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD) *************** *** 270,284 **** static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str, ! int len, int init_len, ! RE_TRANSLATE_TYPE trans, int icase); static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str, ! int len, RE_TRANSLATE_TYPE trans, ! int icase); static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx, ! int eflags, int newline); static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, ! int new_buf_len); #ifdef RE_ENABLE_I18N static void build_wcs_buffer (re_string_t *pstr); static void build_wcs_upper_buffer (re_string_t *pstr); --- 271,285 ---- static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str, ! int len, int init_len, ! RE_TRANSLATE_TYPE trans, int icase); static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str, ! int len, RE_TRANSLATE_TYPE trans, ! int icase); static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx, ! int eflags, int newline); static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, ! int new_buf_len); #ifdef RE_ENABLE_I18N static void build_wcs_buffer (re_string_t *pstr); static void build_wcs_upper_buffer (re_string_t *pstr); *************** *** 292,298 **** static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx); #endif /* RE_ENABLE_I18N */ static unsigned int re_string_context_at (const re_string_t *input, int idx, ! int eflags, int newline_anchor); #define re_string_peek_byte(pstr, offset) \ ((pstr)->mbs[(pstr)->cur_idx + offset]) #define re_string_peek_byte_case(pstr, offset) \ --- 293,299 ---- static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx); #endif /* RE_ENABLE_I18N */ static unsigned int re_string_context_at (const re_string_t *input, int idx, ! int eflags, int newline_anchor); #define re_string_peek_byte(pstr, offset) \ ((pstr)->mbs[(pstr)->cur_idx + offset]) #define re_string_peek_byte_case(pstr, offset) \ *************** *** 305,311 **** ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF) #define re_string_is_single_byte_char(pstr, idx) \ ((pstr)->wcs[idx] != WEOF && ((pstr)->len == (idx) \ ! || (pstr)->wcs[(idx) + 1] != WEOF)) #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) #define re_string_cur_idx(pstr) ((pstr)->cur_idx) #define re_string_get_buffer(pstr) ((pstr)->mbs) --- 306,312 ---- ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF) #define re_string_is_single_byte_char(pstr, idx) \ ((pstr)->wcs[idx] != WEOF && ((pstr)->len == (idx) \ ! || (pstr)->wcs[(idx) + 1] != WEOF)) #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) #define re_string_cur_idx(pstr) ((pstr)->cur_idx) #define re_string_get_buffer(pstr) ((pstr)->mbs) *************** *** 349,354 **** --- 350,357 ---- #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) + #define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') + #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ *************** *** 398,411 **** re_dfastate_t **array; }; struct re_backref_cache_entry { ! int node, str_idx, hash; ! int dst_node; int subexp_from; int subexp_to; - int subexp_len; int flag; }; typedef struct --- 401,442 ---- re_dfastate_t **array; }; + typedef struct + { + int next_idx; + int alloc; + re_dfastate_t **array; + } state_array_t; + + typedef struct + { + int node; + int str_idx; + state_array_t path; + re_node_set *limits; + } re_sub_match_last_t; + + typedef struct + { + int str_idx; + int node; + int next_last_offset; + int alasts; + int nlasts; + re_sub_match_last_t **lasts; + state_array_t *path; + re_node_set *limits; + } re_sub_match_top_t; + struct re_backref_cache_entry { ! int node; ! int str_idx; int subexp_from; int subexp_to; int flag; + re_sub_match_top_t *top; + re_sub_match_last_t *last; }; typedef struct *************** *** 425,432 **** int abkref_ents; struct re_backref_cache_entry *bkref_ents; int max_mb_elem_len; ! /* Cache the last lookup in the back reference cache. */ ! int cache_node, cache_str_idx, cache_first_idx, cache_last_idx; } re_match_context_t; typedef struct --- 456,464 ---- int abkref_ents; struct re_backref_cache_entry *bkref_ents; int max_mb_elem_len; ! int nsub_tops; ! int asub_tops; ! re_sub_match_top_t **sub_tops; } re_match_context_t; typedef struct *************** *** 498,518 **** static reg_errcode_t re_node_set_alloc (re_node_set *set, int size); static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem); static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1, ! int elem2); #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) static reg_errcode_t re_node_set_init_copy (re_node_set *dest, ! const re_node_set *src); static reg_errcode_t re_node_set_add_intersect (re_node_set *dest, ! const re_node_set *src1, ! const re_node_set *src2); static reg_errcode_t re_node_set_init_union (re_node_set *dest, ! const re_node_set *src1, ! const re_node_set *src2); static reg_errcode_t re_node_set_merge (re_node_set *dest, ! const re_node_set *src); static int re_node_set_insert (re_node_set *set, int elem); static int re_node_set_compare (const re_node_set *set1, ! const re_node_set *set2); static int re_node_set_contains (const re_node_set *set, int elem); static void re_node_set_remove_at (re_node_set *set, int idx); #define re_node_set_remove(set,id) \ --- 530,550 ---- static reg_errcode_t re_node_set_alloc (re_node_set *set, int size); static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem); static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1, ! int elem2); #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) static reg_errcode_t re_node_set_init_copy (re_node_set *dest, ! const re_node_set *src); static reg_errcode_t re_node_set_add_intersect (re_node_set *dest, ! const re_node_set *src1, ! const re_node_set *src2); static reg_errcode_t re_node_set_init_union (re_node_set *dest, ! const re_node_set *src1, ! const re_node_set *src2); static reg_errcode_t re_node_set_merge (re_node_set *dest, ! const re_node_set *src); static int re_node_set_insert (re_node_set *set, int elem); static int re_node_set_compare (const re_node_set *set1, ! const re_node_set *set2); static int re_node_set_contains (const re_node_set *set, int elem); static void re_node_set_remove_at (re_node_set *set, int idx); #define re_node_set_remove(set,id) \ *************** *** 521,531 **** #define re_node_set_free(set) re_free ((set)->elems) static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode); static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, ! const re_node_set *nodes); static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err, ! re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int context); typedef enum --- 553,564 ---- #define re_node_set_free(set) re_free ((set)->elems) static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode); static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, ! const re_node_set *nodes); static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err, ! re_dfa_t *dfa, ! const re_node_set *nodes, ! unsigned int context); ! static void free_state (re_dfastate_t *state); typedef enum *************** *** 621,629 **** { table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_INDIRECTMB); p = pstr->mbs + idx; tmp = findidx (&p); return p - pstr->mbs - idx; --- 654,662 ---- { table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, ! _NL_COLLATE_INDIRECTMB); p = pstr->mbs + idx; tmp = findidx (&p); return p - pstr->mbs - idx; *************** *** 635,638 **** #endif /* RE_ENABLE_I18N */ #endif /* _REGEX_INTERNAL_H */ - --- 668,670 ---- diff -rNC3 sed-4.0.3/lib/regexec.c sed-4.0.4/lib/regexec.c *** sed-4.0.3/lib/regexec.c Wed Oct 23 13:52:23 2002 --- sed-4.0.4/lib/regexec.c Fri Nov 22 12:19:43 2002 *************** *** 44,181 **** #include "regex_internal.h" static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, ! re_string_t *input, int n); static void match_ctx_free (re_match_context_t *cache); ! static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, re_dfa_t *dfa, ! int node, int str_idx, int from, int to); static void match_ctx_clear_flag (re_match_context_t *mctx); static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, ! re_dfastate_t **limited_sts, int last_node, ! int last_str_idx, int check_subexp); static int re_search_2_stub (struct re_pattern_buffer *bufp, ! const char *string1, int length1, ! const char *string2, int length2, ! int start, int range, struct re_registers *regs, ! int stop, int ret_len); static int re_search_stub (struct re_pattern_buffer *bufp, ! const char *string, int length, int start, ! int range, int stop, struct re_registers *regs, ! int ret_len); static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, ! int nregs, int regs_allocated); static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err, ! const regex_t *preg, ! const re_match_context_t *mctx, ! int idx); static int check_matching (const regex_t *preg, re_match_context_t *mctx, ! int fl_search, int fl_longest_match); static int check_halt_node_context (const re_dfa_t *dfa, int node, ! unsigned int context); static int check_halt_state_context (const regex_t *preg, ! const re_dfastate_t *state, ! const re_match_context_t *mctx, int idx); static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node, ! int cur_idx, int nmatch); static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs, ! const re_match_context_t *mctx, ! int *pidx, int node, re_node_set *eps_via_nodes, ! struct re_fail_stack_t *fs); ! static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, ! int str_idx, int *dests, int nregs, ! regmatch_t *regs, ! re_node_set *eps_via_nodes); static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, ! regmatch_t *regs, re_node_set *eps_via_nodes); static reg_errcode_t set_regs (const regex_t *preg, ! const re_match_context_t *mctx, ! size_t nmatch, regmatch_t *pmatch, ! int fl_backtrack); static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const regex_t *preg, ! const re_match_context_t *mctx, ! re_sift_context_t *sctx, ! int node_idx, int str_idx, int max_str_idx); #endif /* RE_ENABLE_I18N */ static reg_errcode_t sift_states_backward (const regex_t *preg, ! re_match_context_t *mctx, ! re_sift_context_t *sctx); static reg_errcode_t update_cur_sifted_state (const regex_t *preg, ! re_match_context_t *mctx, ! re_sift_context_t *sctx, ! int str_idx, ! re_node_set *dest_nodes); static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, ! re_node_set *dest_nodes, ! const re_node_set *candidates); static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node, ! re_node_set *dest_nodes, ! const re_node_set *and_nodes); static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits, ! re_match_context_t *mctx, int dst_node, ! int dst_idx, int src_node, int src_idx); static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx, ! int limit, re_node_set *eclosures, ! int subexp_idx, int node, int str_idx); static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, ! re_node_set *dest_nodes, ! const re_node_set *candidates, ! re_node_set *limits, ! struct re_backref_cache_entry *bkref_ents, ! int str_idx); ! static reg_errcode_t search_subexp (const regex_t *preg, ! re_match_context_t *mctx, ! re_sift_context_t *sctx, int str_idx, ! re_node_set *dest_nodes); static reg_errcode_t sift_states_bkref (const regex_t *preg, ! re_match_context_t *mctx, ! re_sift_context_t *sctx, ! int str_idx, re_node_set *dest_nodes); static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx, ! int next_state_log_idx); static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, ! re_dfastate_t **src, int num); static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg, ! re_match_context_t *mctx, ! re_dfastate_t *state, int fl_search); static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg, ! re_dfastate_t *pstate, ! int fl_search, ! re_match_context_t *mctx); #ifdef RE_ENABLE_I18N static reg_errcode_t transit_state_mb (const regex_t *preg, ! re_dfastate_t *pstate, ! re_match_context_t *mctx); #endif /* RE_ENABLE_I18N */ static reg_errcode_t transit_state_bkref (const regex_t *preg, ! re_dfastate_t *pstate, ! re_match_context_t *mctx); static reg_errcode_t transit_state_bkref_loop (const regex_t *preg, ! re_node_set *nodes, ! re_dfastate_t **work_state_log, ! re_match_context_t *mctx); static re_dfastate_t **build_trtable (const regex_t *dfa, ! const re_dfastate_t *state, ! int fl_search); #ifdef RE_ENABLE_I18N static int check_node_accept_bytes (const regex_t *preg, int node_idx, ! const re_string_t *input, int idx); # ifdef _LIBC static unsigned int find_collation_sequence_value (const unsigned char *mbs, ! size_t name_len); # endif /* _LIBC */ #endif /* RE_ENABLE_I18N */ static int group_nodes_into_DFAstates (const regex_t *dfa, ! const re_dfastate_t *state, ! re_node_set *states_node, ! bitset *states_ch); static int check_node_accept (const regex_t *preg, const re_token_t *node, ! const re_match_context_t *mctx, int idx); static reg_errcode_t extend_buffers (re_match_context_t *mctx); static inline int my_memcmp (char *s1, char *s2, unsigned int l); - /* Entry point for POSIX code. */ /* regexec searches for a given pattern, specified by PREG, in the --- 44,215 ---- #include "regex_internal.h" static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, ! re_string_t *input, int n); static void match_ctx_free (re_match_context_t *cache); ! static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, ! int str_idx, int from, int to, ! re_sub_match_top_t *top, ! re_sub_match_last_t *last); ! static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx); static void match_ctx_clear_flag (re_match_context_t *mctx); + static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx); + static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx); static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, ! re_dfastate_t **limited_sts, int last_node, ! int last_str_idx, int check_subexp); static int re_search_2_stub (struct re_pattern_buffer *bufp, ! const char *string1, int length1, ! const char *string2, int length2, ! int start, int range, struct re_registers *regs, ! int stop, int ret_len); static int re_search_stub (struct re_pattern_buffer *bufp, ! const char *string, int length, int start, ! int range, int stop, struct re_registers *regs, ! int ret_len); static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, ! int nregs, int regs_allocated); static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err, ! const regex_t *preg, ! const re_match_context_t *mctx, ! int idx); ! static reg_errcode_t prune_impossible_nodes (const regex_t *preg, ! re_match_context_t *mctx); static int check_matching (const regex_t *preg, re_match_context_t *mctx, ! int fl_search, int fl_longest_match); static int check_halt_node_context (const re_dfa_t *dfa, int node, ! unsigned int context); static int check_halt_state_context (const regex_t *preg, ! const re_dfastate_t *state, ! const re_match_context_t *mctx, int idx); static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node, ! int cur_idx, int nmatch); static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs, ! const re_match_context_t *mctx, ! int *pidx, int node, re_node_set *eps_via_nodes, ! struct re_fail_stack_t *fs); ! static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, ! int str_idx, int *dests, int nregs, ! regmatch_t *regs, ! re_node_set *eps_via_nodes); static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, ! regmatch_t *regs, re_node_set *eps_via_nodes); static reg_errcode_t set_regs (const regex_t *preg, ! const re_match_context_t *mctx, ! size_t nmatch, regmatch_t *pmatch, ! int fl_backtrack); static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const regex_t *preg, ! const re_match_context_t *mctx, ! re_sift_context_t *sctx, ! int node_idx, int str_idx, int max_str_idx); #endif /* RE_ENABLE_I18N */ static reg_errcode_t sift_states_backward (const regex_t *preg, ! re_match_context_t *mctx, ! re_sift_context_t *sctx); static reg_errcode_t update_cur_sifted_state (const regex_t *preg, ! re_match_context_t *mctx, ! re_sift_context_t *sctx, ! int str_idx, ! re_node_set *dest_nodes); static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, ! re_node_set *dest_nodes, ! const re_node_set *candidates); static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node, ! re_node_set *dest_nodes, ! const re_node_set *and_nodes); static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits, ! re_match_context_t *mctx, int dst_node, ! int dst_idx, int src_node, int src_idx); static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx, ! int limit, re_node_set *eclosures, ! int subexp_idx, int node, int str_idx); static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, ! re_node_set *dest_nodes, ! const re_node_set *candidates, ! re_node_set *limits, ! struct re_backref_cache_entry *bkref_ents, ! int str_idx); static reg_errcode_t sift_states_bkref (const regex_t *preg, ! re_match_context_t *mctx, ! re_sift_context_t *sctx, ! int str_idx, re_node_set *dest_nodes); static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx, ! int next_state_log_idx); static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, ! re_dfastate_t **src, int num); static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg, ! re_match_context_t *mctx, ! re_dfastate_t *state, int fl_search); ! static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa, ! re_match_context_t *mctx, ! re_node_set *cur_nodes, ! int str_idx); static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg, ! re_dfastate_t *pstate, ! int fl_search, ! re_match_context_t *mctx); #ifdef RE_ENABLE_I18N static reg_errcode_t transit_state_mb (const regex_t *preg, ! re_dfastate_t *pstate, ! re_match_context_t *mctx); #endif /* RE_ENABLE_I18N */ static reg_errcode_t transit_state_bkref (const regex_t *preg, ! re_dfastate_t *pstate, ! re_match_context_t *mctx); static reg_errcode_t transit_state_bkref_loop (const regex_t *preg, ! re_node_set *nodes, ! re_match_context_t *mctx); ! static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx, ! int bkref_node, ! int bkref_str_idx, int subexp_idx); ! static reg_errcode_t get_subexp_sub (const regex_t *preg, ! re_match_context_t *mctx, ! re_sub_match_top_t *sub_top, ! re_sub_match_last_t *sub_last, ! int bkref_node, int bkref_str, ! int subexp_idx); ! static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes, ! int subexp_idx, int fl_open); ! static reg_errcode_t check_arrival (const regex_t *preg, ! re_match_context_t *mctx, ! re_sub_match_top_t *sub_top, ! re_sub_match_last_t *sub_last, ! int bkref_node, int bkref_str); ! static reg_errcode_t expand_eclosures (re_dfa_t *dfa, re_node_set *cur_nodes, ! int ex_subexp, int fl_open); ! static reg_errcode_t expand_eclosures_sub (re_dfa_t *dfa, re_node_set *dst_nodes, ! int target, int ex_subexp, int fl_open); ! static reg_errcode_t expand_bkref_cache (const regex_t *preg, ! re_match_context_t *mctx, ! re_sub_match_top_t *sub_top, ! re_sub_match_last_t *sub_last, ! re_node_set *cur_nodes, int cur_str, ! int last_str, int ex_subexp, ! int fl_open); static re_dfastate_t **build_trtable (const regex_t *dfa, ! const re_dfastate_t *state, ! int fl_search); #ifdef RE_ENABLE_I18N static int check_node_accept_bytes (const regex_t *preg, int node_idx, ! const re_string_t *input, int idx); # ifdef _LIBC static unsigned int find_collation_sequence_value (const unsigned char *mbs, ! size_t name_len); # endif /* _LIBC */ #endif /* RE_ENABLE_I18N */ static int group_nodes_into_DFAstates (const regex_t *dfa, ! const re_dfastate_t *state, ! re_node_set *states_node, ! bitset *states_ch); static int check_node_accept (const regex_t *preg, const re_token_t *node, ! const re_match_context_t *mctx, int idx); static reg_errcode_t extend_buffers (re_match_context_t *mctx); static inline int my_memcmp (char *s1, char *s2, unsigned int l); /* Entry point for POSIX code. */ /* regexec searches for a given pattern, specified by PREG, in the *************** *** 204,213 **** int length = strlen (string); if (preg->no_sub) err = re_search_internal (preg, string, length, 0, length, length, 0, ! NULL, eflags); else err = re_search_internal (preg, string, length, 0, length, length, nmatch, ! pmatch, eflags); return err != REG_NOERROR; } #ifdef _LIBC --- 238,247 ---- int length = strlen (string); if (preg->no_sub) err = re_search_internal (preg, string, length, 0, length, length, 0, ! NULL, eflags); else err = re_search_internal (preg, string, length, 0, length, length, nmatch, ! pmatch, eflags); return err != REG_NOERROR; } #ifdef _LIBC *************** *** 277,283 **** struct re_registers *regs; { return re_search_2_stub (bufp, string1, length1, string2, length2, ! start, 0, regs, stop, 1); } #ifdef _LIBC weak_alias (__re_match_2, re_match_2) --- 311,317 ---- struct re_registers *regs; { return re_search_2_stub (bufp, string1, length1, string2, length2, ! start, 0, regs, stop, 1); } #ifdef _LIBC weak_alias (__re_match_2, re_match_2) *************** *** 291,297 **** struct re_registers *regs; { return re_search_2_stub (bufp, string1, length1, string2, length2, ! start, range, regs, stop, 0); } #ifdef _LIBC weak_alias (__re_search_2, re_search_2) --- 325,331 ---- struct re_registers *regs; { return re_search_2_stub (bufp, string1, length1, string2, length2, ! start, range, regs, stop, 0); } #ifdef _LIBC weak_alias (__re_search_2, re_search_2) *************** *** 299,305 **** static int re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, ! stop, ret_len) struct re_pattern_buffer *bufp; const char *string1, *string2; int length1, length2, start, range, stop, ret_len; --- 333,339 ---- static int re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, ! stop, ret_len) struct re_pattern_buffer *bufp; const char *string1, *string2; int length1, length2, start, range, stop, ret_len; *************** *** 317,330 **** if (length2 > 0) if (length1 > 0) { ! char *s = re_malloc (char, len); ! if (BE (s == NULL, 0)) ! return -2; ! memcpy (s, string1, length1); ! memcpy (s + length1, string2, length2); ! str = s; ! free_str = 1; } else str = string2; --- 351,364 ---- if (length2 > 0) if (length1 > 0) { ! char *s = re_malloc (char, len); ! if (BE (s == NULL, 0)) ! return -2; ! memcpy (s, string1, length1); ! memcpy (s + length1, string2, length2); ! str = s; ! free_str = 1; } else str = string2; *************** *** 332,340 **** str = string1; rval = re_search_stub (bufp, str, len, start, range, stop, regs, ! ret_len); if (free_str) ! re_free ((char *) str); return rval; } --- 366,374 ---- str = string1; rval = re_search_stub (bufp, str, len, start, range, stop, regs, ! ret_len); if (free_str) ! re_free ((char *) str); return rval; } *************** *** 377,391 **** if (regs == NULL) nregs = 1; else if (BE (bufp->regs_allocated == REGS_FIXED && ! regs->num_regs < bufp->re_nsub + 1, 0)) { nregs = regs->num_regs; if (BE (nregs < 1, 0)) ! { ! /* Nothing can be copied to regs. */ ! regs = NULL; ! nregs = 1; ! } } else nregs = bufp->re_nsub + 1; --- 411,425 ---- if (regs == NULL) nregs = 1; else if (BE (bufp->regs_allocated == REGS_FIXED && ! regs->num_regs < bufp->re_nsub + 1, 0)) { nregs = regs->num_regs; if (BE (nregs < 1, 0)) ! { ! /* Nothing can be copied to regs. */ ! regs = NULL; ! nregs = 1; ! } } else nregs = bufp->re_nsub + 1; *************** *** 394,400 **** return -2; result = re_search_internal (bufp, string, length, start, range, stop, ! nregs, pmatch, eflags); rval = 0; --- 428,434 ---- return -2; result = re_search_internal (bufp, string, length, start, range, stop, ! nregs, pmatch, eflags); rval = 0; *************** *** 405,424 **** { /* If caller wants register contents data back, copy them. */ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, ! bufp->regs_allocated); if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) ! rval = -2; } if (BE (rval == 0, 1)) { if (ret_len) ! { ! assert (pmatch[0].rm_so == start); ! rval = pmatch[0].rm_eo - start; ! } else ! rval = pmatch[0].rm_so; } re_free (pmatch); return rval; --- 439,458 ---- { /* If caller wants register contents data back, copy them. */ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, ! bufp->regs_allocated); if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) ! rval = -2; } if (BE (rval == 0, 1)) { if (ret_len) ! { ! assert (pmatch[0].rm_so == start); ! rval = pmatch[0].rm_eo - start; ! } else ! rval = pmatch[0].rm_so; } re_free (pmatch); return rval; *************** *** 441,476 **** { /* No. So allocate them with malloc. */ regs->start = re_malloc (regoff_t, need_regs); if (BE (regs->start == NULL, 0)) ! return REGS_UNALLOCATED; regs->end = re_malloc (regoff_t, need_regs); if (BE (regs->end == NULL, 0)) ! { ! re_free (regs->start); ! return REGS_UNALLOCATED; ! } regs->num_regs = need_regs; } else if (regs_allocated == REGS_REALLOCATE) { /* Yes. If we need more elements than were already ! allocated, reallocate them. If we need fewer, just ! leave it alone. */ if (need_regs > regs->num_regs) ! { ! regs->start = re_realloc (regs->start, regoff_t, need_regs); ! if (BE (regs->start == NULL, 0)) ! { ! if (regs->end != NULL) ! re_free (regs->end); ! return REGS_UNALLOCATED; ! } ! regs->end = re_realloc (regs->end, regoff_t, need_regs); ! if (BE (regs->end == NULL, 0)) ! { ! re_free (regs->start); ! return REGS_UNALLOCATED; ! } ! regs->num_regs = need_regs; ! } } else { --- 475,510 ---- { /* No. So allocate them with malloc. */ regs->start = re_malloc (regoff_t, need_regs); if (BE (regs->start == NULL, 0)) ! return REGS_UNALLOCATED; regs->end = re_malloc (regoff_t, need_regs); if (BE (regs->end == NULL, 0)) ! { ! re_free (regs->start); ! return REGS_UNALLOCATED; ! } regs->num_regs = need_regs; } else if (regs_allocated == REGS_REALLOCATE) { /* Yes. If we need more elements than were already ! allocated, reallocate them. If we need fewer, just ! leave it alone. */ if (need_regs > regs->num_regs) ! { ! regs->start = re_realloc (regs->start, regoff_t, need_regs); ! if (BE (regs->start == NULL, 0)) ! { ! if (regs->end != NULL) ! re_free (regs->end); ! return REGS_UNALLOCATED; ! } ! regs->end = re_realloc (regs->end, regoff_t, need_regs); ! if (BE (regs->end == NULL, 0)) ! { ! re_free (regs->start); ! return REGS_UNALLOCATED; ! } ! regs->num_regs = need_regs; ! } } else { *************** *** 560,566 **** reg_errcode_t re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, ! eflags) const regex_t *preg; const char *string; int length, start, range, stop, eflags; --- 594,600 ---- reg_errcode_t re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, ! eflags) const regex_t *preg; const char *string; int length, start, range, stop, eflags; *************** *** 572,601 **** re_string_t input; int left_lim, right_lim, incr; int fl_longest_match, match_first, match_last = -1; re_match_context_t mctx; ! char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate) ! ? preg->fastmap : NULL); /* Check if the DFA haven't been compiled. */ if (BE (preg->used == 0 || dfa->init_state == NULL ! || dfa->init_state_word == NULL || dfa->init_state_nl == NULL ! || dfa->init_state_begbuf == NULL, 0)) return REG_NOMATCH; re_node_set_init_empty (&empty_set); /* We must check the longest matching, if nmatch > 0. */ ! fl_longest_match = (nmatch != 0); err = re_string_allocate (&input, string, length, dfa->nodes_len + 1, ! preg->translate, preg->syntax & RE_ICASE); if (BE (err != REG_NOERROR, 0)) ! return err; input.stop = stop; err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2); if (BE (err != REG_NOERROR, 0)) ! return err; /* We will log all the DFA states through which the dfa pass, if nmatch > 1, or this dfa has "multibyte node", which is a --- 606,637 ---- re_string_t input; int left_lim, right_lim, incr; int fl_longest_match, match_first, match_last = -1; + int fast_translate, sb; re_match_context_t mctx; ! char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate ! && range && !preg->can_be_null) ? preg->fastmap : NULL); /* Check if the DFA haven't been compiled. */ if (BE (preg->used == 0 || dfa->init_state == NULL ! || dfa->init_state_word == NULL || dfa->init_state_nl == NULL ! || dfa->init_state_begbuf == NULL, 0)) return REG_NOMATCH; re_node_set_init_empty (&empty_set); + memset (&mctx, '\0', sizeof (re_match_context_t)); /* We must check the longest matching, if nmatch > 0. */ ! fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&input, string, length, dfa->nodes_len + 1, ! preg->translate, preg->syntax & RE_ICASE); if (BE (err != REG_NOERROR, 0)) ! goto free_return; input.stop = stop; err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2); if (BE (err != REG_NOERROR, 0)) ! goto free_return; /* We will log all the DFA states through which the dfa pass, if nmatch > 1, or this dfa has "multibyte node", which is a *************** *** 605,611 **** { mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1); if (BE (mctx.state_log == NULL, 0)) ! return REG_ESPACE; } else mctx.state_log = NULL; --- 641,650 ---- { mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1); if (BE (mctx.state_log == NULL, 0)) ! { ! err = REG_ESPACE; ! goto free_return; ! } } else mctx.state_log = NULL; *************** *** 617,691 **** match_first = start; input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF ! : CONTEXT_NEWLINE | CONTEXT_BEGBUF); /* Check incrementally whether of not the input string match. */ incr = (range < 0) ? -1 : 1; left_lim = (range < 0) ? start + range : start; right_lim = (range < 0) ? start : start + range; for (;;) { /* At first get the current byte from input string. */ ! int ch; ! if (MB_CUR_MAX > 1 && (preg->syntax & RE_ICASE || preg->translate)) ! { ! /* In this case, we can't determin easily the current byte, ! since it might be a component byte of a multibyte character. ! Then we use the constructed buffer instead. */ ! /* If MATCH_FIRST is out of the valid range, reconstruct the ! buffers. */ ! if (input.raw_mbs_idx + input.valid_len <= match_first) ! re_string_reconstruct (&input, match_first, eflags, ! preg->newline_anchor); ! /* If MATCH_FIRST is out of the buffer, leave it as '\0'. ! Note that MATCH_FIRST must not be smaller than 0. */ ! ch = ((match_first >= length) ? 0 ! : re_string_byte_at (&input, match_first - input.raw_mbs_idx)); ! } ! else ! { ! /* We apply translate/conversion manually, since it is trivial ! in this case. */ ! /* If MATCH_FIRST is out of the buffer, leave it as '\0'. ! Note that MATCH_FIRST must not be smaller than 0. */ ! ch = (match_first < length) ? (unsigned char)string[match_first] : 0; ! /* Apply translation if we need. */ ! ch = preg->translate ? preg->translate[ch] : ch; ! /* In case of case insensitive mode, convert to upper case. */ ! ch = ((preg->syntax & RE_ICASE) && islower (ch)) ? toupper (ch) : ch; ! } ! ! /* Eliminate inappropriate one by fastmap. */ ! if (preg->can_be_null || fastmap == NULL || fastmap[ch]) ! { ! /* Reconstruct the buffers so that the matcher can assume that ! the matching starts from the begining of the buffer. */ ! re_string_reconstruct (&input, match_first, eflags, ! preg->newline_anchor); #ifdef RE_ENABLE_I18N ! /* Eliminate it when it is a component of a multibyte character ! and isn't the head of a multibyte character. */ ! if (MB_CUR_MAX == 1 || re_string_first_byte (&input, 0)) ! #endif ! { ! /* It seems to be appropriate one, then use the matcher. */ ! /* We assume that the matching starts from 0. */ ! mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; ! match_last = check_matching (preg, &mctx, 0, fl_longest_match); ! if (match_last != -1) ! { ! if (BE (match_last == -2, 0)) ! return REG_ESPACE; ! else ! break; /* We found a matching. */ ! } ! } ! } /* Update counter. */ match_first += incr; if (match_first < left_lim || right_lim < match_first) ! break; } /* Set pmatch[] if we need. */ --- 656,800 ---- match_first = start; input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF ! : CONTEXT_NEWLINE | CONTEXT_BEGBUF); /* Check incrementally whether of not the input string match. */ incr = (range < 0) ? -1 : 1; left_lim = (range < 0) ? start + range : start; right_lim = (range < 0) ? start : start + range; + sb = MB_CUR_MAX == 1; + fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate); for (;;) { /* At first get the current byte from input string. */ ! if (fastmap) ! { ! if (BE (fast_translate, 1)) ! { ! unsigned RE_TRANSLATE_TYPE t ! = (unsigned RE_TRANSLATE_TYPE) preg->translate; ! if (BE (range >= 0, 1)) ! { ! if (BE (t != NULL, 0)) ! { ! while (BE (match_first < right_lim, 1) ! && !fastmap[t[(unsigned char) string[match_first]]]) ! ++match_first; ! } ! else ! { ! while (BE (match_first < right_lim, 1) ! && !fastmap[(unsigned char) string[match_first]]) ! ++match_first; ! } ! if (BE (match_first == right_lim, 0)) ! { ! int ch = match_first >= length ! ? 0 : (unsigned char) string[match_first]; ! if (!fastmap[t ? t[ch] : ch]) ! break; ! } ! } ! else ! { ! while (match_first >= left_lim) ! { ! int ch = match_first >= length ! ? 0 : (unsigned char) string[match_first]; ! if (fastmap[t ? t[ch] : ch]) ! break; ! --match_first; ! } ! if (match_first < left_lim) ! break; ! } ! } ! else ! { ! int ch; ! ! do ! { ! /* In this case, we can't determine easily the current byte, ! since it might be a component byte of a multibyte ! character. Then we use the constructed buffer ! instead. */ ! /* If MATCH_FIRST is out of the valid range, reconstruct the ! buffers. */ ! if (input.raw_mbs_idx + input.valid_len <= match_first ! || match_first < input.raw_mbs_idx) ! { ! err = re_string_reconstruct (&input, match_first, eflags, ! preg->newline_anchor); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; ! } ! /* If MATCH_FIRST is out of the buffer, leave it as '\0'. ! Note that MATCH_FIRST must not be smaller than 0. */ ! ch = ((match_first >= length) ? 0 ! : re_string_byte_at (&input, ! match_first - input.raw_mbs_idx)); ! if (fastmap[ch]) ! break; ! match_first += incr; ! } ! while (match_first >= left_lim && match_first <= right_lim); ! if (! fastmap[ch]) ! break; ! } ! } ! ! /* Reconstruct the buffers so that the matcher can assume that ! the matching starts from the begining of the buffer. */ ! err = re_string_reconstruct (&input, match_first, eflags, ! preg->newline_anchor); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; #ifdef RE_ENABLE_I18N ! /* Eliminate it when it is a component of a multibyte character ! and isn't the head of a multibyte character. */ ! if (sb || re_string_first_byte (&input, 0)) ! #endif ! { ! /* It seems to be appropriate one, then use the matcher. */ ! /* We assume that the matching starts from 0. */ ! mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; ! match_last = check_matching (preg, &mctx, 0, fl_longest_match); ! if (match_last != -1) ! { ! if (BE (match_last == -2, 0)) ! { ! err = REG_ESPACE; ! goto free_return; ! } ! else ! { ! mctx.match_last = match_last; ! if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) ! { ! re_dfastate_t *pstate = mctx.state_log[match_last]; ! mctx.last_node = check_halt_state_context (preg, pstate, ! &mctx, match_last); ! } ! if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) ! || dfa->nbackref) ! { ! err = prune_impossible_nodes (preg, &mctx); ! if (err == REG_NOERROR) ! break; ! if (BE (err != REG_NOMATCH, 0)) ! goto free_return; ! } ! else ! break; /* We found a matching. */ ! } ! } ! } /* Update counter. */ match_first += incr; if (match_first < left_lim || right_lim < match_first) ! break; } /* Set pmatch[] if we need. */ *************** *** 695,772 **** /* Initialize registers. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) ! pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; /* Set the points where matching start/end. */ pmatch[0].rm_so = 0; ! mctx.match_last = pmatch[0].rm_eo = match_last; if (!preg->no_sub && nmatch > 1) ! { ! /* We need the ranges of all the subexpressions. */ ! int halt_node; ! re_dfastate_t **sifted_states; ! re_dfastate_t **lim_states = NULL; ! re_dfastate_t *pstate = mctx.state_log[match_last]; ! re_sift_context_t sctx; ! #ifdef DEBUG ! assert (mctx.state_log != NULL); ! #endif ! halt_node = check_halt_state_context (preg, pstate, &mctx, ! match_last); ! if (dfa->has_plural_match) ! { ! match_ctx_clear_flag (&mctx); ! sifted_states = re_malloc (re_dfastate_t *, match_last + 1); ! if (BE (sifted_states == NULL, 0)) ! return REG_ESPACE; ! if (dfa->nbackref) ! { ! lim_states = calloc (sizeof (re_dfastate_t *), ! match_last + 1); ! if (BE (lim_states == NULL, 0)) ! return REG_ESPACE; ! } ! sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, ! mctx.match_last, 0); ! err = sift_states_backward (preg, &mctx, &sctx); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! if (lim_states != NULL) ! { ! err = merge_state_array (dfa, sifted_states, lim_states, ! match_last + 1); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! re_free (lim_states); ! } ! re_node_set_free (&sctx.limits); ! re_free (mctx.state_log); ! mctx.state_log = sifted_states; ! } ! mctx.last_node = halt_node; ! err = set_regs (preg, &mctx, nmatch, pmatch, ! dfa->has_plural_match && dfa->nbackref > 0); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } /* At last, add the offset to the each registers, since we slided ! the buffers so that We can assume that the matching starts from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) ! if (pmatch[reg_idx].rm_so != -1) ! { ! pmatch[reg_idx].rm_so += match_first; ! pmatch[reg_idx].rm_eo += match_first; ! } } ! re_free (mctx.state_log); if (dfa->nbackref) match_ctx_free (&mctx); re_string_destruct (&input); ! return (match_last == -1) ? REG_NOMATCH : REG_NOERROR; } /* Acquire an initial state and return it. --- 804,923 ---- /* Initialize registers. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) ! pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; /* Set the points where matching start/end. */ pmatch[0].rm_so = 0; ! pmatch[0].rm_eo = mctx.match_last; if (!preg->no_sub && nmatch > 1) ! { ! err = set_regs (preg, &mctx, nmatch, pmatch, ! dfa->has_plural_match && dfa->nbackref > 0); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; ! } /* At last, add the offset to the each registers, since we slided ! the buffers so that We can assume that the matching starts from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) ! if (pmatch[reg_idx].rm_so != -1) ! { ! pmatch[reg_idx].rm_so += match_first; ! pmatch[reg_idx].rm_eo += match_first; ! } } ! err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR; ! free_return: re_free (mctx.state_log); if (dfa->nbackref) match_ctx_free (&mctx); re_string_destruct (&input); + return err; + } ! static reg_errcode_t ! prune_impossible_nodes (preg, mctx) ! const regex_t *preg; ! re_match_context_t *mctx; ! { ! int halt_node, match_last; ! reg_errcode_t ret; ! re_dfa_t *dfa = (re_dfa_t *)preg->buffer; ! re_dfastate_t **sifted_states; ! re_dfastate_t **lim_states = NULL; ! re_sift_context_t sctx; ! #ifdef DEBUG ! assert (mctx->state_log != NULL); ! #endif ! match_last = mctx->match_last; ! halt_node = mctx->last_node; ! sifted_states = re_malloc (re_dfastate_t *, match_last + 1); ! if (BE (sifted_states == NULL, 0)) ! { ! ret = REG_ESPACE; ! goto free_return; ! } ! if (dfa->nbackref) ! { ! lim_states = re_malloc (re_dfastate_t *, match_last + 1); ! if (BE (lim_states == NULL, 0)) ! { ! ret = REG_ESPACE; ! goto free_return; ! } ! while (1) ! { ! memset (lim_states, '\0', ! sizeof (re_dfastate_t *) * (match_last + 1)); ! match_ctx_clear_flag (mctx); ! sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, ! match_last, 0); ! ret = sift_states_backward (preg, mctx, &sctx); ! re_node_set_free (&sctx.limits); ! if (BE (ret != REG_NOERROR, 0)) ! goto free_return; ! if (sifted_states[0] != NULL || lim_states[0] != NULL) ! break; ! do ! { ! --match_last; ! if (match_last < 0) ! { ! ret = REG_NOMATCH; ! goto free_return; ! } ! } while (!mctx->state_log[match_last]->halt); ! halt_node = check_halt_state_context (preg, ! mctx->state_log[match_last], ! mctx, match_last); ! } ! ret = merge_state_array (dfa, sifted_states, lim_states, ! match_last + 1); ! re_free (lim_states); ! lim_states = NULL; ! if (BE (ret != REG_NOERROR, 0)) ! goto free_return; ! } ! else ! { ! sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, ! match_last, 0); ! ret = sift_states_backward (preg, mctx, &sctx); ! re_node_set_free (&sctx.limits); ! if (BE (ret != REG_NOERROR, 0)) ! goto free_return; ! } ! re_free (mctx->state_log); ! mctx->state_log = sifted_states; ! sifted_states = NULL; ! mctx->last_node = halt_node; ! mctx->match_last = match_last; ! ret = REG_NOERROR; ! free_return: ! re_free (sifted_states); ! re_free (lim_states); ! return ret; } /* Acquire an initial state and return it. *************** *** 787,811 **** { unsigned int context; context = re_string_context_at (mctx->input, idx - 1, mctx->eflags, ! preg->newline_anchor); if (IS_WORD_CONTEXT (context)) ! return dfa->init_state_word; else if (IS_ORDINARY_CONTEXT (context)) ! return dfa->init_state; else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) ! return dfa->init_state_begbuf; else if (IS_NEWLINE_CONTEXT (context)) ! return dfa->init_state_nl; else if (IS_BEGBUF_CONTEXT (context)) ! { ! /* It is relatively rare case, then calculate on demand. */ ! return re_acquire_state_context (err, dfa, ! dfa->init_state->entrance_nodes, ! context); ! } else ! /* Must not happen? */ ! return dfa->init_state; } else return dfa->init_state; --- 938,962 ---- { unsigned int context; context = re_string_context_at (mctx->input, idx - 1, mctx->eflags, ! preg->newline_anchor); if (IS_WORD_CONTEXT (context)) ! return dfa->init_state_word; else if (IS_ORDINARY_CONTEXT (context)) ! return dfa->init_state; else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) ! return dfa->init_state_begbuf; else if (IS_NEWLINE_CONTEXT (context)) ! return dfa->init_state_nl; else if (IS_BEGBUF_CONTEXT (context)) ! { ! /* It is relatively rare case, then calculate on demand. */ ! return re_acquire_state_context (err, dfa, ! dfa->init_state->entrance_nodes, ! context); ! } else ! /* Must not happen? */ ! return dfa->init_state; } else return dfa->init_state; *************** *** 825,830 **** --- 976,982 ---- re_match_context_t *mctx; int fl_search, fl_longest_match; { + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; reg_errcode_t err; int match = 0; int match_last = -1; *************** *** 838,945 **** if (mctx->state_log != NULL) mctx->state_log[cur_str_idx] = cur_state; if (cur_state->has_backref) { ! int i; ! re_dfa_t *dfa = (re_dfa_t *) preg->buffer; ! for (i = 0; i < cur_state->nodes.nelem; ++i) ! { ! int node = cur_state->nodes.elems[i]; ! re_token_type_t type = dfa->nodes[node].type; ! if (type == OP_BACK_REF) ! { ! int clexp_idx; ! for (clexp_idx = 0; clexp_idx < cur_state->nodes.nelem; ! ++clexp_idx) ! { ! re_token_t *clexp_node; ! clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx]; ! if (clexp_node->type == OP_CLOSE_SUBEXP ! && clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx) ! { ! err = match_ctx_add_entry (mctx, dfa, node, 0, 0, 0); ! if (BE (err != REG_NOERROR, 0)) ! return -2; ! break; ! } ! } ! } ! } } /* If the RE accepts NULL string. */ if (cur_state->halt) { if (!cur_state->has_constraint ! || check_halt_state_context (preg, cur_state, mctx, cur_str_idx)) ! { ! if (!fl_longest_match) ! return cur_str_idx; ! else ! { ! match_last = cur_str_idx; ! match = 1; ! } ! } } while (!re_string_eoi (mctx->input)) { cur_state = transit_state (&err, preg, mctx, cur_state, ! fl_search && !match); if (cur_state == NULL) /* Reached at the invalid state or an error. */ ! { ! cur_str_idx = re_string_cur_idx (mctx->input); ! if (BE (err != REG_NOERROR, 0)) ! return -2; ! if (fl_search && !match) ! { ! /* Restart from initial state, since we are searching ! the point from where matching start. */ #ifdef RE_ENABLE_I18N ! if (MB_CUR_MAX == 1 ! || re_string_first_byte (mctx->input, cur_str_idx)) #endif /* RE_ENABLE_I18N */ ! cur_state = acquire_init_state_context (&err, preg, mctx, ! cur_str_idx); ! if (BE (cur_state == NULL && err != REG_NOERROR, 0)) ! return -2; ! if (mctx->state_log != NULL) ! mctx->state_log[cur_str_idx] = cur_state; ! } ! else if (!fl_longest_match && match) ! break; ! else /* (fl_longest_match && match) || (!fl_search && !match) */ ! { ! if (mctx->state_log == NULL) ! break; ! else ! { ! int max = mctx->state_log_top; ! for (; cur_str_idx <= max; ++cur_str_idx) ! if (mctx->state_log[cur_str_idx] != NULL) ! break; ! if (cur_str_idx > max) ! break; ! } ! } ! } if (cur_state != NULL && cur_state->halt) ! { ! /* Reached at a halt state. ! Check the halt state can satisfy the current context. */ ! if (!cur_state->has_constraint ! || check_halt_state_context (preg, cur_state, mctx, ! re_string_cur_idx (mctx->input))) ! { ! /* We found an appropriate halt state. */ ! match_last = re_string_cur_idx (mctx->input); ! match = 1; ! if (!fl_longest_match) ! break; ! } ! } } return match_last; } --- 990,1084 ---- if (mctx->state_log != NULL) mctx->state_log[cur_str_idx] = cur_state; + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (dfa->nbackref) + { + err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + if (cur_state->has_backref) { ! err = transit_state_bkref (preg, cur_state, mctx); ! if (BE (err != REG_NOERROR, 0)) ! return err; } /* If the RE accepts NULL string. */ if (cur_state->halt) { if (!cur_state->has_constraint ! || check_halt_state_context (preg, cur_state, mctx, cur_str_idx)) ! { ! if (!fl_longest_match) ! return cur_str_idx; ! else ! { ! match_last = cur_str_idx; ! match = 1; ! } ! } } while (!re_string_eoi (mctx->input)) { cur_state = transit_state (&err, preg, mctx, cur_state, ! fl_search && !match); if (cur_state == NULL) /* Reached at the invalid state or an error. */ ! { ! cur_str_idx = re_string_cur_idx (mctx->input); ! if (BE (err != REG_NOERROR, 0)) ! return -2; ! if (fl_search && !match) ! { ! /* Restart from initial state, since we are searching ! the point from where matching start. */ #ifdef RE_ENABLE_I18N ! if (MB_CUR_MAX == 1 ! || re_string_first_byte (mctx->input, cur_str_idx)) #endif /* RE_ENABLE_I18N */ ! cur_state = acquire_init_state_context (&err, preg, mctx, ! cur_str_idx); ! if (BE (cur_state == NULL && err != REG_NOERROR, 0)) ! return -2; ! if (mctx->state_log != NULL) ! mctx->state_log[cur_str_idx] = cur_state; ! } ! else if (!fl_longest_match && match) ! break; ! else /* (fl_longest_match && match) || (!fl_search && !match) */ ! { ! if (mctx->state_log == NULL) ! break; ! else ! { ! int max = mctx->state_log_top; ! for (; cur_str_idx <= max; ++cur_str_idx) ! if (mctx->state_log[cur_str_idx] != NULL) ! break; ! if (cur_str_idx > max) ! break; ! } ! } ! } if (cur_state != NULL && cur_state->halt) ! { ! /* Reached at a halt state. ! Check the halt state can satisfy the current context. */ ! if (!cur_state->has_constraint ! || check_halt_state_context (preg, cur_state, mctx, ! re_string_cur_idx (mctx->input))) ! { ! /* We found an appropriate halt state. */ ! match_last = re_string_cur_idx (mctx->input); ! match = 1; ! if (!fl_longest_match) ! break; ! } ! } } return match_last; } *************** *** 980,986 **** assert (state->halt); #endif context = re_string_context_at (mctx->input, idx, mctx->eflags, ! preg->newline_anchor); for (i = 0; i < state->nodes.nelem; ++i) if (check_halt_node_context (dfa, state->nodes.elems[i], context)) return state->nodes.elems[i]; --- 1119,1125 ---- assert (state->halt); #endif context = re_string_context_at (mctx->input, idx, mctx->eflags, ! preg->newline_anchor); for (i = 0; i < state->nodes.nelem; ++i) if (check_halt_node_context (dfa, state->nodes.elems[i], context)) return state->nodes.elems[i]; *************** *** 1010,1033 **** int ndest, dest_nodes[2]; err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) ! return -1; /* Pick up valid destinations. */ for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i) ! { ! int candidate = dfa->edests[node].elems[i]; ! if (!re_node_set_contains (cur_nodes, candidate)) ! continue; ! dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0]; ! dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1]; ! ++ndest; ! } if (ndest <= 1) ! return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0); /* In order to avoid infinite loop like "(a*)*". */ if (re_node_set_contains (eps_via_nodes, dest_nodes[0])) ! return dest_nodes[1]; if (fs != NULL) ! push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes); return dest_nodes[0]; } else --- 1149,1172 ---- int ndest, dest_nodes[2]; err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) ! return -1; /* Pick up valid destinations. */ for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i) ! { ! int candidate = dfa->edests[node].elems[i]; ! if (!re_node_set_contains (cur_nodes, candidate)) ! continue; ! dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0]; ! dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1]; ! ++ndest; ! } if (ndest <= 1) ! return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0); /* In order to avoid infinite loop like "(a*)*". */ if (re_node_set_contains (eps_via_nodes, dest_nodes[0])) ! return dest_nodes[1]; if (fs != NULL) ! push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes); return dest_nodes[0]; } else *************** *** 1037,1086 **** #ifdef RE_ENABLE_I18N if (ACCEPT_MB_NODE (type)) ! naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx); else #endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) ! { ! int subexp_idx = dfa->nodes[node].opr.idx; ! naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; ! if (fs != NULL) ! { ! if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) ! return -1; ! else if (naccepted) ! { ! char *buf = re_string_get_buffer (mctx->input); ! if (my_memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, ! naccepted) != 0) ! return -1; ! } ! } ! ! if (naccepted == 0) ! { ! err = re_node_set_insert (eps_via_nodes, node); ! if (BE (err < 0, 0)) ! return -2; ! dest_node = dfa->edests[node].elems[0]; ! if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, ! dest_node)) ! return dest_node; ! } ! } if (naccepted != 0 ! || check_node_accept (preg, dfa->nodes + node, mctx, *pidx)) ! { ! dest_node = dfa->nexts[node]; ! *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; ! if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL ! || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, ! dest_node))) ! return -1; ! re_node_set_empty (eps_via_nodes); ! return dest_node; ! } } return -1; } --- 1176,1225 ---- #ifdef RE_ENABLE_I18N if (ACCEPT_MB_NODE (type)) ! naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx); else #endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) ! { ! int subexp_idx = dfa->nodes[node].opr.idx; ! naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; ! if (fs != NULL) ! { ! if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) ! return -1; ! else if (naccepted) ! { ! char *buf = re_string_get_buffer (mctx->input); ! if (my_memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, ! naccepted) != 0) ! return -1; ! } ! } ! ! if (naccepted == 0) ! { ! err = re_node_set_insert (eps_via_nodes, node); ! if (BE (err < 0, 0)) ! return -2; ! dest_node = dfa->edests[node].elems[0]; ! if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, ! dest_node)) ! return dest_node; ! } ! } if (naccepted != 0 ! || check_node_accept (preg, dfa->nodes + node, mctx, *pidx)) ! { ! dest_node = dfa->nexts[node]; ! *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; ! if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL ! || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, ! dest_node))) ! return -1; ! re_node_set_empty (eps_via_nodes); ! return dest_node; ! } } return -1; } *************** *** 1096,1106 **** int num = fs->num++; if (fs->num == fs->alloc) { fs->alloc *= 2; ! fs->stack = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) ! * fs->alloc)); ! if (fs->stack == NULL) ! return REG_ESPACE; } fs->stack[num].idx = str_idx; fs->stack[num].node = dests[1]; --- 1235,1247 ---- int num = fs->num++; if (fs->num == fs->alloc) { + struct re_fail_stack_ent_t *new_array; fs->alloc *= 2; ! new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) ! * fs->alloc)); ! if (new_array == NULL) ! return REG_ESPACE; ! fs->stack = new_array; } fs->stack[num].idx = str_idx; fs->stack[num].node = dests[1]; *************** *** 1109,1115 **** err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); return err; } ! static int pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) struct re_fail_stack_t *fs; --- 1250,1256 ---- err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); return err; } ! static int pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) struct re_fail_stack_t *fs; *************** *** 1163,1207 **** { update_regs (dfa, pmatch, cur_node, idx, real_nmatch); if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) ! { ! int reg_idx; ! if (fs) ! { ! for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) ! if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) ! break; ! if (reg_idx == nmatch) ! { ! re_node_set_free (&eps_via_nodes); ! return free_fail_stack_return (fs); ! } ! cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, ! &eps_via_nodes); ! } ! else ! { ! re_node_set_free (&eps_via_nodes); ! return REG_NOERROR; ! } ! } /* Proceed to next node. */ cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node, ! &eps_via_nodes, fs); if (BE (cur_node < 0, 0)) ! { ! if (cur_node == -2) ! return REG_ESPACE; ! if (fs) ! cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, ! &eps_via_nodes); ! else ! { ! re_node_set_free (&eps_via_nodes); ! return REG_NOMATCH; ! } ! } } re_node_set_free (&eps_via_nodes); return free_fail_stack_return (fs); --- 1304,1348 ---- { update_regs (dfa, pmatch, cur_node, idx, real_nmatch); if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) ! { ! int reg_idx; ! if (fs) ! { ! for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) ! if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) ! break; ! if (reg_idx == nmatch) ! { ! re_node_set_free (&eps_via_nodes); ! return free_fail_stack_return (fs); ! } ! cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, ! &eps_via_nodes); ! } ! else ! { ! re_node_set_free (&eps_via_nodes); ! return REG_NOERROR; ! } ! } /* Proceed to next node. */ cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node, ! &eps_via_nodes, fs); if (BE (cur_node < 0, 0)) ! { ! if (cur_node == -2) ! return REG_ESPACE; ! if (fs) ! cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, ! &eps_via_nodes); ! else ! { ! re_node_set_free (&eps_via_nodes); ! return REG_NOMATCH; ! } ! } } re_node_set_free (&eps_via_nodes); return free_fail_stack_return (fs); *************** *** 1215,1224 **** { int fs_idx; for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) ! { ! re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); ! re_free (fs->stack[fs_idx].regs); ! } re_free (fs->stack); } return REG_NOERROR; --- 1356,1365 ---- { int fs_idx; for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) ! { ! re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); ! re_free (fs->stack[fs_idx].regs); ! } re_free (fs->stack); } return REG_NOERROR; *************** *** 1256,1274 **** Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): ! If `a' isn't the LAST_NODE and `a' can't epsilon transit to ! the LAST_NODE, we throw away the node `a'. 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts ! string `s' and transit to `b': ! i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw ! away the node `a'. ! ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is ! throwed away, we throw away the node `a'. 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b': ! i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the ! node `a'. ! ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away, ! we throw away the node `a'. */ #define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) --- 1397,1415 ---- Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): ! If `a' isn't the LAST_NODE and `a' can't epsilon transit to ! the LAST_NODE, we throw away the node `a'. 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts ! string `s' and transit to `b': ! i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw ! away the node `a'. ! ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is ! throwed away, we throw away the node `a'. 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b': ! i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the ! node `a'. ! ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away, ! we throw away the node `a'. */ #define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) *************** *** 1298,1304 **** return err; err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) ! return err; /* Then check each states in the state_log. */ while (str_idx > 0) --- 1439,1445 ---- return err; err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) ! goto free_return; /* Then check each states in the state_log. */ while (str_idx > 0) *************** *** 1307,1381 **** /* Update counters. */ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; if (null_cnt > mctx->max_mb_elem_len) ! { ! memset (sctx->sifted_states, '\0', ! sizeof (re_dfastate_t *) * str_idx); ! re_node_set_free (&cur_dest); ! return REG_NOERROR; ! } re_node_set_empty (&cur_dest); --str_idx; cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set ! : &mctx->state_log[str_idx]->nodes); /* Then build the next sifted state. ! We build the next sifted state on `cur_dest', and update ! `sifted_states[str_idx]' with `cur_dest'. ! Note: ! `cur_dest' is the sifted state from `state_log[str_idx + 1]'. ! `cur_src' points the node_set of the old `state_log[str_idx]'. */ for (i = 0; i < cur_src->nelem; i++) ! { ! int prev_node = cur_src->elems[i]; ! int naccepted = 0; ! re_token_type_t type = dfa->nodes[prev_node].type; ! if (IS_EPSILON_NODE(type)) ! continue; #ifdef RE_ENABLE_I18N ! /* If the node may accept `multi byte'. */ ! if (ACCEPT_MB_NODE (type)) ! naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node, ! str_idx, sctx->last_str_idx); #endif /* RE_ENABLE_I18N */ ! /* We don't check backreferences here. ! See update_cur_sifted_state(). */ ! if (!naccepted ! && check_node_accept (preg, dfa->nodes + prev_node, mctx, ! str_idx) ! && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], ! dfa->nexts[prev_node])) ! naccepted = 1; ! ! if (naccepted == 0) ! continue; ! ! if (sctx->limits.nelem) ! { ! int to_idx = str_idx + naccepted; ! if (check_dst_limits (dfa, &sctx->limits, mctx, ! dfa->nexts[prev_node], to_idx, ! prev_node, str_idx)) ! continue; ! } ! ret = re_node_set_insert (&cur_dest, prev_node); ! if (BE (ret == -1, 0)) ! return err; ! } /* Add all the nodes which satisfy the following conditions: ! - It can epsilon transit to a node in CUR_DEST. ! - It is in CUR_SRC. ! And update state_log. */ err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) ! return err; } ! re_node_set_free (&cur_dest); ! return REG_NOERROR; } /* Helper functions. */ --- 1448,1526 ---- /* Update counters. */ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; if (null_cnt > mctx->max_mb_elem_len) ! { ! memset (sctx->sifted_states, '\0', ! sizeof (re_dfastate_t *) * str_idx); ! re_node_set_free (&cur_dest); ! return REG_NOERROR; ! } re_node_set_empty (&cur_dest); --str_idx; cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set ! : &mctx->state_log[str_idx]->nodes); /* Then build the next sifted state. ! We build the next sifted state on `cur_dest', and update ! `sifted_states[str_idx]' with `cur_dest'. ! Note: ! `cur_dest' is the sifted state from `state_log[str_idx + 1]'. ! `cur_src' points the node_set of the old `state_log[str_idx]'. */ for (i = 0; i < cur_src->nelem; i++) ! { ! int prev_node = cur_src->elems[i]; ! int naccepted = 0; ! re_token_type_t type = dfa->nodes[prev_node].type; ! if (IS_EPSILON_NODE(type)) ! continue; #ifdef RE_ENABLE_I18N ! /* If the node may accept `multi byte'. */ ! if (ACCEPT_MB_NODE (type)) ! naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node, ! str_idx, sctx->last_str_idx); #endif /* RE_ENABLE_I18N */ ! /* We don't check backreferences here. ! See update_cur_sifted_state(). */ ! ! if (!naccepted ! && check_node_accept (preg, dfa->nodes + prev_node, mctx, ! str_idx) ! && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], ! dfa->nexts[prev_node])) ! naccepted = 1; ! if (naccepted == 0) ! continue; ! ! if (sctx->limits.nelem) ! { ! int to_idx = str_idx + naccepted; ! if (check_dst_limits (dfa, &sctx->limits, mctx, ! dfa->nexts[prev_node], to_idx, ! prev_node, str_idx)) ! continue; ! } ! ret = re_node_set_insert (&cur_dest, prev_node); ! if (BE (ret == -1, 0)) ! { ! err = REG_ESPACE; ! goto free_return; ! } ! } /* Add all the nodes which satisfy the following conditions: ! - It can epsilon transit to a node in CUR_DEST. ! - It is in CUR_SRC. ! And update state_log. */ err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) ! goto free_return; } ! err = REG_NOERROR; ! free_return: re_node_set_free (&cur_dest); ! return err; } /* Helper functions. */ *************** *** 1389,1412 **** if (next_state_log_idx >= mctx->input->bufs_len || (next_state_log_idx >= mctx->input->valid_len ! && mctx->input->valid_len < mctx->input->len)) { reg_errcode_t err; err = extend_buffers (mctx); if (BE (err != REG_NOERROR, 0)) ! return err; } if (top < next_state_log_idx) { memset (mctx->state_log + top + 1, '\0', ! sizeof (re_dfastate_t *) * (next_state_log_idx - top)); mctx->state_log_top = next_state_log_idx; } return REG_NOERROR; } ! static reg_errcode_t merge_state_array (dfa, dst, src, num) re_dfa_t *dfa; re_dfastate_t **dst; re_dfastate_t **src; --- 1534,1558 ---- if (next_state_log_idx >= mctx->input->bufs_len || (next_state_log_idx >= mctx->input->valid_len ! && mctx->input->valid_len < mctx->input->len)) { reg_errcode_t err; err = extend_buffers (mctx); if (BE (err != REG_NOERROR, 0)) ! return err; } if (top < next_state_log_idx) { memset (mctx->state_log + top + 1, '\0', ! sizeof (re_dfastate_t *) * (next_state_log_idx - top)); mctx->state_log_top = next_state_log_idx; } return REG_NOERROR; } ! static reg_errcode_t ! merge_state_array (dfa, dst, src, num) re_dfa_t *dfa; re_dfastate_t **dst; re_dfastate_t **src; *************** *** 1417,1435 **** for (st_idx = 0; st_idx < num; ++st_idx) { if (dst[st_idx] == NULL) ! dst[st_idx] = src[st_idx]; else if (src[st_idx] != NULL) ! { ! re_node_set merged_set; ! err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, ! &src[st_idx]->nodes); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! re_node_set_free (&merged_set); ! } } return REG_NOERROR; } --- 1563,1581 ---- for (st_idx = 0; st_idx < num; ++st_idx) { if (dst[st_idx] == NULL) ! dst[st_idx] = src[st_idx]; else if (src[st_idx] != NULL) ! { ! re_node_set merged_set; ! err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, ! &src[st_idx]->nodes); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); ! re_node_set_free (&merged_set); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } } return REG_NOERROR; } *************** *** 1446,1452 **** re_dfa_t *dfa = (re_dfa_t *)preg->buffer; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set ! : &mctx->state_log[str_idx]->nodes); /* At first, add the nodes which can epsilon transit to a node in DEST_NODE. */ --- 1592,1598 ---- re_dfa_t *dfa = (re_dfa_t *)preg->buffer; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set ! : &mctx->state_log[str_idx]->nodes); /* At first, add the nodes which can epsilon transit to a node in DEST_NODE. */ *************** *** 1454,1469 **** { err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) ! return err; } /* Then, check the limitations in the current sift_context. */ if (dest_nodes->nelem && sctx->limits.nelem) { err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, ! mctx->bkref_ents, str_idx); if (BE (err != REG_NOERROR, 0)) ! return err; } /* Update state_log. */ --- 1600,1615 ---- { err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) ! return err; } /* Then, check the limitations in the current sift_context. */ if (dest_nodes->nelem && sctx->limits.nelem) { err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, ! mctx->bkref_ents, str_idx); if (BE (err != REG_NOERROR, 0)) ! return err; } /* Update state_log. */ *************** *** 1471,1491 **** if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0)) return err; - /* If we are searching for the subexpression candidates. - Note that we were from transit_state_bkref_loop() in this case. */ - if (dest_nodes->nelem && sctx->check_subexp) - { - err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - if ((mctx->state_log[str_idx] != NULL && mctx->state_log[str_idx]->has_backref)) { err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes); if (BE (err != REG_NOERROR, 0)) ! return err; } return REG_NOERROR; } --- 1617,1628 ---- if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0)) return err; if ((mctx->state_log[str_idx] != NULL && mctx->state_log[str_idx]->has_backref)) { err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes); if (BE (err != REG_NOERROR, 0)) ! return err; } return REG_NOERROR; } *************** *** 1506,1515 **** for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx) { err = re_node_set_add_intersect (dest_nodes, candidates, ! dfa->inveclosures ! + src_copy.elems[src_idx]); if (BE (err != REG_NOERROR, 0)) ! return err; } re_node_set_free (&src_copy); return REG_NOERROR; --- 1643,1655 ---- for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx) { err = re_node_set_add_intersect (dest_nodes, candidates, ! dfa->inveclosures ! + src_copy.elems[src_idx]); if (BE (err != REG_NOERROR, 0)) ! { ! re_node_set_free (&src_copy); ! return err; ! } } re_node_set_free (&src_copy); return REG_NOERROR; *************** *** 1529,1563 **** re_node_set_init_empty (&except_nodes); for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { ! int cur_node = inv_eclosure->elems[ecl_idx]; ! if (cur_node == node) ! continue; ! if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) ! { ! int edst1 = dfa->edests[cur_node].elems[0]; ! int edst2 = ((dfa->edests[cur_node].nelem > 1) ! ? dfa->edests[cur_node].elems[1] : -1); ! if ((!re_node_set_contains (inv_eclosure, edst1) ! && re_node_set_contains (dest_nodes, edst1)) ! || (edst2 > 0 ! && !re_node_set_contains (inv_eclosure, edst2) ! && re_node_set_contains (dest_nodes, edst2))) ! { ! err = re_node_set_add_intersect (&except_nodes, candidates, ! dfa->inveclosures + cur_node); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! } } for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { ! int cur_node = inv_eclosure->elems[ecl_idx]; ! if (!re_node_set_contains (&except_nodes, cur_node)) ! { ! int idx = re_node_set_contains (dest_nodes, cur_node) - 1; ! re_node_set_remove_at (dest_nodes, idx); ! } } re_node_set_free (&except_nodes); return REG_NOERROR; --- 1669,1706 ---- re_node_set_init_empty (&except_nodes); for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { ! int cur_node = inv_eclosure->elems[ecl_idx]; ! if (cur_node == node) ! continue; ! if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) ! { ! int edst1 = dfa->edests[cur_node].elems[0]; ! int edst2 = ((dfa->edests[cur_node].nelem > 1) ! ? dfa->edests[cur_node].elems[1] : -1); ! if ((!re_node_set_contains (inv_eclosure, edst1) ! && re_node_set_contains (dest_nodes, edst1)) ! || (edst2 > 0 ! && !re_node_set_contains (inv_eclosure, edst2) ! && re_node_set_contains (dest_nodes, edst2))) ! { ! err = re_node_set_add_intersect (&except_nodes, candidates, ! dfa->inveclosures + cur_node); ! if (BE (err != REG_NOERROR, 0)) ! { ! re_node_set_free (&except_nodes); ! return err; ! } ! } ! } } for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { ! int cur_node = inv_eclosure->elems[ecl_idx]; ! if (!re_node_set_contains (&except_nodes, cur_node)) ! { ! int idx = re_node_set_contains (dest_nodes, cur_node) - 1; ! re_node_set_remove_at (dest_nodes, idx); ! } } re_node_set_free (&except_nodes); return REG_NOERROR; *************** *** 1580,1606 **** subexp_idx = dfa->nodes[ent->node].opr.idx - 1; dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], ! dfa->eclosures + dst_node, ! subexp_idx, dst_node, dst_idx); src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], ! dfa->eclosures + src_node, ! subexp_idx, src_node, src_idx); /* In case of: ! ( ) ! ( ) ! ( ) */ if (src_pos == dst_pos) ! continue; /* This is unrelated limitation. */ else ! return 1; } return 0; } static int check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node, ! str_idx) re_dfa_t *dfa; re_match_context_t *mctx; re_node_set *eclosures; --- 1723,1749 ---- subexp_idx = dfa->nodes[ent->node].opr.idx - 1; dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], ! dfa->eclosures + dst_node, ! subexp_idx, dst_node, dst_idx); src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], ! dfa->eclosures + src_node, ! subexp_idx, src_node, src_idx); /* In case of: ! ( ) ! ( ) ! ( ) */ if (src_pos == dst_pos) ! continue; /* This is unrelated limitation. */ else ! return 1; } return 0; } static int check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node, ! str_idx) re_dfa_t *dfa; re_match_context_t *mctx; re_node_set *eclosures; *************** *** 1608,1655 **** { struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; int pos = (str_idx < lim->subexp_from ? -1 ! : (lim->subexp_to < str_idx ? 1 : 0)); if (pos == 0 && (str_idx == lim->subexp_from || str_idx == lim->subexp_to)) { int node_idx; for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) ! { ! int node = eclosures->elems[node_idx]; ! re_token_type_t type= dfa->nodes[node].type; ! if (type == OP_BACK_REF) ! { ! int bi; ! for (bi = 0; bi < mctx->nbkref_ents; ++bi) ! { ! struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; ! if (ent->node == node && ent->subexp_from == ent->subexp_to ! && ent->str_idx == str_idx) ! { ! int cpos, dst; ! dst = dfa->edests[node].elems[0]; ! cpos = check_dst_limits_calc_pos (dfa, mctx, limit, ! dfa->eclosures + dst, ! subexp_idx, dst, ! str_idx); ! if ((str_idx == lim->subexp_from && cpos == -1) ! || (str_idx == lim->subexp_to && cpos == 0)) ! return cpos; ! } ! } ! } ! if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx ! && str_idx == lim->subexp_from) ! { ! pos = -1; ! break; ! } ! if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx ! && str_idx == lim->subexp_to) ! break; ! } if (node_idx == eclosures->nelem && str_idx == lim->subexp_to) ! pos = 1; } return pos; } --- 1751,1799 ---- { struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; int pos = (str_idx < lim->subexp_from ? -1 ! : (lim->subexp_to < str_idx ? 1 : 0)); if (pos == 0 && (str_idx == lim->subexp_from || str_idx == lim->subexp_to)) { int node_idx; for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) ! { ! int node = eclosures->elems[node_idx]; ! re_token_type_t type= dfa->nodes[node].type; ! if (type == OP_BACK_REF) ! { ! int bi = search_cur_bkref_entry (mctx, str_idx); ! for (; bi < mctx->nbkref_ents; ++bi) ! { ! struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; ! if (ent->str_idx > str_idx) ! break; ! if (ent->node == node && ent->subexp_from == ent->subexp_to) ! { ! int cpos, dst; ! dst = dfa->edests[node].elems[0]; ! cpos = check_dst_limits_calc_pos (dfa, mctx, limit, ! dfa->eclosures + dst, ! subexp_idx, dst, ! str_idx); ! if ((str_idx == lim->subexp_from && cpos == -1) ! || (str_idx == lim->subexp_to && cpos == 0)) ! return cpos; ! } ! } ! } ! if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx ! && str_idx == lim->subexp_from) ! { ! pos = -1; ! break; ! } ! if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx ! && str_idx == lim->subexp_to) ! break; ! } if (node_idx == eclosures->nelem && str_idx == lim->subexp_to) ! pos = 1; } return pos; } *************** *** 1676,1895 **** ent = bkref_ents + limits->elems[lim_idx]; if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) ! continue; /* This is unrelated limitation. */ subexp_idx = dfa->nodes[ent->node].opr.idx - 1; if (ent->subexp_to == str_idx) ! { ! int ops_node = -1; ! int cls_node = -1; ! for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) ! { ! int node = dest_nodes->elems[node_idx]; ! re_token_type_t type= dfa->nodes[node].type; ! if (type == OP_OPEN_SUBEXP ! && subexp_idx == dfa->nodes[node].opr.idx) ! ops_node = node; ! else if (type == OP_CLOSE_SUBEXP ! && subexp_idx == dfa->nodes[node].opr.idx) ! cls_node = node; ! } ! ! /* Check the limitation of the open subexpression. */ ! /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ ! if (ops_node >= 0) ! { ! err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes, ! candidates); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! /* Check the limitation of the close subexpression. */ ! for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) ! { ! int node = dest_nodes->elems[node_idx]; ! if (!re_node_set_contains (dfa->inveclosures + node, cls_node) ! && !re_node_set_contains (dfa->eclosures + node, cls_node)) ! { ! /* It is against this limitation. ! Remove it form the current sifted state. */ ! err = sub_epsilon_src_nodes(dfa, node, dest_nodes, ! candidates); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! --node_idx; ! } ! } ! } ! else /* (ent->subexp_to != str_idx) */ ! { ! for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) ! { ! int node = dest_nodes->elems[node_idx]; ! re_token_type_t type= dfa->nodes[node].type; ! if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) ! { ! if (subexp_idx != dfa->nodes[node].opr.idx) ! continue; ! if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx) ! || (type == OP_OPEN_SUBEXP)) ! { ! /* It is against this limitation. ! Remove it form the current sifted state. */ ! err = sub_epsilon_src_nodes(dfa, node, dest_nodes, ! candidates); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! } ! } ! } ! } ! return REG_NOERROR; ! } ! ! /* Search for the top (in case of sctx->check_subexp < 0) or the ! bottom (in case of sctx->check_subexp > 0) of the subexpressions ! which the backreference sctx->cur_bkref can match. */ ! ! static reg_errcode_t ! search_subexp (preg, mctx, sctx, str_idx, dest_nodes) ! const regex_t *preg; ! re_match_context_t *mctx; ! re_sift_context_t *sctx; ! int str_idx; ! re_node_set *dest_nodes; ! { ! reg_errcode_t err; ! re_dfa_t *dfa = (re_dfa_t *)preg->buffer; ! re_sift_context_t local_sctx; ! int node_idx, node; ! const re_node_set *candidates; ! re_dfastate_t **lim_states = NULL; ! candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set ! : &mctx->state_log[str_idx]->nodes); ! local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ ! ! for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) ! { ! re_token_type_t type; ! node = dest_nodes->elems[node_idx]; ! type = dfa->nodes[node].type; ! ! if (type == OP_CLOSE_SUBEXP ! && sctx->check_subexp == dfa->nodes[node].opr.idx + 1) ! { ! re_dfastate_t *cur_state; ! /* Found the bottom of the subexpression, then search for the ! top of it. */ ! if (local_sctx.sifted_states == NULL) ! { ! /* It hasn't been initialized yet, initialize it now. */ ! local_sctx = *sctx; ! err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! local_sctx.check_subexp = -sctx->check_subexp; ! local_sctx.limited_states = sctx->limited_states; ! local_sctx.last_node = node; ! local_sctx.last_str_idx = local_sctx.cls_subexp_idx = str_idx; ! cur_state = local_sctx.sifted_states[str_idx]; ! err = sift_states_backward (preg, mctx, &local_sctx); ! local_sctx.sifted_states[str_idx] = cur_state; ! if (BE (err != REG_NOERROR, 0)) ! return err; ! /* There must not 2 same node in a node set. */ ! break; ! } ! else if (type == OP_OPEN_SUBEXP ! && -sctx->check_subexp == dfa->nodes[node].opr.idx + 1) ! { ! /* Found the top of the subexpression, check that the ! backreference can match the input string. */ ! char *buf; ! int dest_str_idx; ! int bkref_str_idx = re_string_cur_idx (mctx->input); ! int subexp_len = sctx->cls_subexp_idx - str_idx; ! if (subexp_len < 0 || bkref_str_idx + subexp_len > mctx->input->len) ! break; ! ! if (bkref_str_idx + subexp_len > mctx->input->valid_len ! && mctx->input->valid_len < mctx->input->len) ! { ! reg_errcode_t err; ! err = extend_buffers (mctx); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! buf = (char *) re_string_get_buffer (mctx->input); ! if (my_memcmp (buf + str_idx, buf + bkref_str_idx, subexp_len) != 0) ! break; ! ! if (sctx->limits.nelem && str_idx > 0) ! { ! re_dfastate_t *cur_state = sctx->sifted_states[str_idx]; ! if (lim_states == NULL) ! { ! lim_states = re_malloc (re_dfastate_t *, str_idx + 1); ! } ! if (local_sctx.sifted_states == NULL) ! { ! /* It hasn't been initialized yet, initialize it now. */ ! local_sctx = *sctx; ! if (BE (lim_states == NULL, 0)) ! return REG_ESPACE; ! err = re_node_set_init_copy (&local_sctx.limits, ! &sctx->limits); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! local_sctx.check_subexp = 0; ! local_sctx.last_node = node; ! local_sctx.last_str_idx = str_idx; ! local_sctx.limited_states = lim_states; ! memset (lim_states, '\0', ! sizeof (re_dfastate_t*) * (str_idx + 1)); ! err = sift_states_backward (preg, mctx, &local_sctx); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! if (local_sctx.sifted_states[0] == NULL ! && local_sctx.limited_states[0] == NULL) ! { ! sctx->sifted_states[str_idx] = cur_state; ! break; ! } ! sctx->sifted_states[str_idx] = cur_state; ! } ! /* Successfully matched, add a new cache entry. */ ! dest_str_idx = bkref_str_idx + subexp_len; ! err = match_ctx_add_entry (mctx, dfa, sctx->cur_bkref, bkref_str_idx, ! str_idx, sctx->cls_subexp_idx); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! err = clean_state_log_if_need (mctx, dest_str_idx); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! break; ! } ! } ! /* Remove the top/bottom of the sub expression we processed. */ ! if (node_idx < dest_nodes->nelem) ! { ! err = sub_epsilon_src_nodes(dfa, node, dest_nodes, candidates); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! /* Update state_log. */ ! sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); ! if (BE (err != REG_NOERROR, 0)) ! return err; } - - if (local_sctx.sifted_states != NULL) - re_node_set_free (&local_sctx.limits); - if (lim_states != NULL) - re_free (lim_states); return REG_NOERROR; } --- 1820,1894 ---- ent = bkref_ents + limits->elems[lim_idx]; if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) ! continue; /* This is unrelated limitation. */ subexp_idx = dfa->nodes[ent->node].opr.idx - 1; if (ent->subexp_to == str_idx) ! { ! int ops_node = -1; ! int cls_node = -1; ! for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) ! { ! int node = dest_nodes->elems[node_idx]; ! re_token_type_t type= dfa->nodes[node].type; ! if (type == OP_OPEN_SUBEXP ! && subexp_idx == dfa->nodes[node].opr.idx) ! ops_node = node; ! else if (type == OP_CLOSE_SUBEXP ! && subexp_idx == dfa->nodes[node].opr.idx) ! cls_node = node; ! } ! /* Check the limitation of the open subexpression. */ ! /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ ! if (ops_node >= 0) ! { ! err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes, ! candidates); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! /* Check the limitation of the close subexpression. */ ! for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) ! { ! int node = dest_nodes->elems[node_idx]; ! if (!re_node_set_contains (dfa->inveclosures + node, cls_node) ! && !re_node_set_contains (dfa->eclosures + node, cls_node)) ! { ! /* It is against this limitation. ! Remove it form the current sifted state. */ ! err = sub_epsilon_src_nodes(dfa, node, dest_nodes, ! candidates); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! --node_idx; ! } ! } ! } ! else /* (ent->subexp_to != str_idx) */ ! { ! for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) ! { ! int node = dest_nodes->elems[node_idx]; ! re_token_type_t type= dfa->nodes[node].type; ! if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) ! { ! if (subexp_idx != dfa->nodes[node].opr.idx) ! continue; ! if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx) ! || (type == OP_OPEN_SUBEXP)) ! { ! /* It is against this limitation. ! Remove it form the current sifted state. */ ! err = sub_epsilon_src_nodes(dfa, node, dest_nodes, ! candidates); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! } ! } ! } } return REG_NOERROR; } *************** *** 1903,1913 **** { reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; ! int node_idx, node, hash; re_sift_context_t local_sctx; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set ! : &mctx->state_log[str_idx]->nodes); local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) --- 1902,1912 ---- { reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; ! int node_idx, node; re_sift_context_t local_sctx; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set ! : &mctx->state_log[str_idx]->nodes); local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) *************** *** 1915,2069 **** int cur_bkref_idx = re_string_cur_idx (mctx->input); re_token_type_t type; node = candidates->elems[node_idx]; if (node == sctx->cur_bkref && str_idx == cur_bkref_idx) ! continue; /* Avoid infinite loop for the REs like "()\1+". */ if (node == sctx->last_node && str_idx == sctx->last_str_idx) ! continue; ! ! type = dfa->nodes[node].type; ! hash = str_idx + (node << 16); if (type == OP_BACK_REF) ! { ! int enabled_idx; ! if (node == mctx->cache_node && str_idx == mctx->cache_str_idx) ! enabled_idx = mctx->cache_first_idx; ! else { ! for (enabled_idx = mctx->nbkref_ents; enabled_idx--; ) ! { ! struct re_backref_cache_entry *entry; ! entry = mctx->bkref_ents + enabled_idx; ! if (BE (entry->hash == hash, 0) ! && BE (entry->str_idx == str_idx, 1) ! && BE(entry->node == node, 1)) { ! mctx->cache_node = node; ! mctx->cache_str_idx = str_idx; ! mctx->cache_last_idx = enabled_idx; ! break; } ! } ! for (enabled_idx = 0; enabled_idx <= mctx->cache_last_idx; ++enabled_idx) ! { ! struct re_backref_cache_entry *entry; ! entry = mctx->bkref_ents + enabled_idx; ! if (BE (entry->hash == hash, 0) ! && BE (entry->str_idx == str_idx, 1) ! && BE(entry->node == node, 1)) { ! mctx->cache_first_idx = enabled_idx; ! break; } ! } } ! ! for (; enabled_idx <= mctx->cache_last_idx; ++enabled_idx) ! { ! int disabled_idx, subexp_len, to_idx, dst_node; ! struct re_backref_cache_entry *entry; ! entry = mctx->bkref_ents + enabled_idx; ! if (BE(entry->hash != hash, 0) ! || BE(entry->node != node, 0) ! || BE(entry->str_idx != str_idx, 0)) ! continue; ! ! subexp_len = entry->subexp_len; ! to_idx = str_idx + subexp_len; ! if (to_idx > sctx->last_str_idx ! || sctx->sifted_states[to_idx] == NULL) ! continue; ! ! dst_node = entry->dst_node; ! if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)) ! continue; ! ! if (check_dst_limits (dfa, &sctx->limits, mctx, node, ! str_idx, dst_node, to_idx)) ! continue; ! if (sctx->check_subexp == dfa->nodes[node].opr.idx) ! { ! char *buf; ! buf = (char *) re_string_get_buffer (mctx->input); ! if (my_memcmp (buf + entry->subexp_from, ! buf + cur_bkref_idx, subexp_len) != 0) ! continue; ! err = match_ctx_add_entry (mctx, dfa, sctx->cur_bkref, ! cur_bkref_idx, entry->subexp_from, ! entry->subexp_to); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! err = clean_state_log_if_need (mctx, cur_bkref_idx ! + subexp_len); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! else ! { ! re_dfastate_t *cur_state; ! entry->flag = 0; ! for (disabled_idx = enabled_idx + 1; ! disabled_idx <= mctx->cache_last_idx; ++disabled_idx) ! { ! struct re_backref_cache_entry *entry2; ! entry2 = mctx->bkref_ents + disabled_idx; ! if (BE(entry2->hash != hash, 0) ! || BE(entry2->node != node, 0) ! || BE(entry2->str_idx != str_idx, 0)) ! continue; ! entry2->flag = 1; ! } ! ! if (local_sctx.sifted_states == NULL) ! { ! local_sctx = *sctx; ! err = re_node_set_init_copy (&local_sctx.limits, ! &sctx->limits); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! local_sctx.last_node = node; ! local_sctx.last_str_idx = str_idx; ! err = re_node_set_insert (&local_sctx.limits, enabled_idx); ! if (BE (err < 0, 0)) ! return REG_ESPACE; ! cur_state = local_sctx.sifted_states[str_idx]; ! err = sift_states_backward (preg, mctx, &local_sctx); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! if (sctx->limited_states != NULL) ! { ! err = merge_state_array (dfa, sctx->limited_states, ! local_sctx.sifted_states, ! str_idx + 1); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! local_sctx.sifted_states[str_idx] = cur_state; ! re_node_set_remove (&local_sctx.limits, enabled_idx); ! /* We must not use the variable entry here, since ! mctx->bkref_ents might be realloced. */ ! mctx->bkref_ents[enabled_idx].flag = 1; ! } ! } ! for (enabled_idx = mctx->cache_first_idx; ! enabled_idx <= mctx->cache_last_idx; ++enabled_idx) ! { ! struct re_backref_cache_entry *entry; ! entry = mctx->bkref_ents + enabled_idx; ! if (BE(entry->hash == hash, 1) ! && BE(entry->node == node, 1) ! && BE(entry->str_idx == str_idx, 1)) ! entry->flag = 0; ! } ! } } if (local_sctx.sifted_states != NULL) { re_node_set_free (&local_sctx.limits); } ! return REG_NOERROR; } --- 1914,2017 ---- int cur_bkref_idx = re_string_cur_idx (mctx->input); re_token_type_t type; node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; if (node == sctx->cur_bkref && str_idx == cur_bkref_idx) ! continue; /* Avoid infinite loop for the REs like "()\1+". */ if (node == sctx->last_node && str_idx == sctx->last_str_idx) ! continue; if (type == OP_BACK_REF) ! { ! int enabled_idx = search_cur_bkref_entry (mctx, str_idx); ! for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx) { ! int disabled_idx, subexp_len, to_idx, dst_node; ! struct re_backref_cache_entry *entry; ! entry = mctx->bkref_ents + enabled_idx; ! if (entry->str_idx > str_idx) ! break; ! if (entry->node != node) ! continue; ! subexp_len = entry->subexp_to - entry->subexp_from; ! to_idx = str_idx + subexp_len; ! dst_node = (subexp_len ? dfa->nexts[node] ! : dfa->edests[node].elems[0]); ! ! if (to_idx > sctx->last_str_idx ! || sctx->sifted_states[to_idx] == NULL ! || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], ! dst_node) ! || check_dst_limits (dfa, &sctx->limits, mctx, node, ! str_idx, dst_node, to_idx)) ! continue; ! { ! re_dfastate_t *cur_state; ! entry->flag = 0; ! for (disabled_idx = enabled_idx + 1; ! disabled_idx < mctx->nbkref_ents; ++disabled_idx) { ! struct re_backref_cache_entry *entry2; ! entry2 = mctx->bkref_ents + disabled_idx; ! if (entry2->str_idx > str_idx) ! break; ! entry2->flag = (entry2->node == node) ? 1 : entry2->flag; } ! ! if (local_sctx.sifted_states == NULL) { ! local_sctx = *sctx; ! err = re_node_set_init_copy (&local_sctx.limits, ! &sctx->limits); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; ! } ! local_sctx.last_node = node; ! local_sctx.last_str_idx = str_idx; ! err = re_node_set_insert (&local_sctx.limits, enabled_idx); ! if (BE (err < 0, 0)) ! { ! err = REG_ESPACE; ! goto free_return; ! } ! cur_state = local_sctx.sifted_states[str_idx]; ! err = sift_states_backward (preg, mctx, &local_sctx); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; ! if (sctx->limited_states != NULL) ! { ! err = merge_state_array (dfa, sctx->limited_states, ! local_sctx.sifted_states, ! str_idx + 1); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; } ! local_sctx.sifted_states[str_idx] = cur_state; ! re_node_set_remove (&local_sctx.limits, enabled_idx); ! /* We must not use the variable entry here, since ! mctx->bkref_ents might be realloced. */ ! mctx->bkref_ents[enabled_idx].flag = 1; ! } } ! enabled_idx = search_cur_bkref_entry (mctx, str_idx); ! for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx) ! { ! struct re_backref_cache_entry *entry; ! entry = mctx->bkref_ents + enabled_idx; ! if (entry->str_idx > str_idx) ! break; ! if (entry->node == node) ! entry->flag = 0; ! } ! } } + err = REG_NOERROR; + free_return: if (local_sctx.sifted_states != NULL) { re_node_set_free (&local_sctx.limits); } ! return err; } *************** *** 2081,2087 **** naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx); if (naccepted > 0 && str_idx + naccepted <= max_str_idx && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], ! dfa->nexts[node_idx])) /* The node can't accept the `multi byte', or the destination was already throwed away, then the node could't accept the current input `multi byte'. */ --- 2029,2035 ---- naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx); if (naccepted > 0 && str_idx + naccepted <= max_str_idx && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], ! dfa->nexts[node_idx])) /* The node can't accept the `multi byte', or the destination was already throwed away, then the node could't accept the current input `multi byte'. */ *************** *** 2111,2124 **** re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfastate_t **trtable, *next_state; unsigned char ch; if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len ! && mctx->input->valid_len < mctx->input->len)) { *err = extend_buffers (mctx); if (BE (*err != REG_NOERROR, 0)) ! return NULL; } *err = REG_NOERROR; --- 2059,2073 ---- re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfastate_t **trtable, *next_state; unsigned char ch; + int cur_idx; if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len ! && mctx->input->valid_len < mctx->input->len)) { *err = extend_buffers (mctx); if (BE (*err != REG_NOERROR, 0)) ! return NULL; } *err = REG_NOERROR; *************** *** 2132,2231 **** #ifdef RE_ENABLE_I18N /* If the current state can accept multibyte. */ if (state->accept_mb) ! { ! *err = transit_state_mb (preg, state, mctx); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } #endif /* RE_ENABLE_I18N */ /* Then decide the next state with the single byte. */ if (1) ! { ! /* Use transition table */ ! ch = re_string_fetch_byte (mctx->input); ! trtable = fl_search ? state->trtable_search : state->trtable; ! if (trtable == NULL) ! { ! trtable = build_trtable (preg, state, fl_search); ! if (fl_search) ! state->trtable_search = trtable; ! else ! state->trtable = trtable; ! } ! next_state = trtable[ch]; ! } else ! { ! /* don't use transition table */ ! next_state = transit_state_sb (err, preg, state, fl_search, mctx); ! if (BE (next_state == NULL && err != REG_NOERROR, 0)) ! return NULL; ! } } /* Update the state_log if we need. */ if (mctx->state_log != NULL) { - int cur_idx = re_string_cur_idx (mctx->input); if (cur_idx > mctx->state_log_top) ! { ! mctx->state_log[cur_idx] = next_state; ! mctx->state_log_top = cur_idx; ! } else if (mctx->state_log[cur_idx] == 0) ! { ! mctx->state_log[cur_idx] = next_state; ! } else ! { ! re_dfastate_t *pstate; ! unsigned int context; ! re_node_set next_nodes, *log_nodes, *table_nodes = NULL; ! /* If (state_log[cur_idx] != 0), it implies that cur_idx is ! the destination of a multibyte char/collating element/ ! back reference. Then the next state is the union set of ! these destinations and the results of the transition table. */ ! pstate = mctx->state_log[cur_idx]; ! log_nodes = pstate->entrance_nodes; ! if (next_state != NULL) ! { ! table_nodes = next_state->entrance_nodes; ! *err = re_node_set_init_union (&next_nodes, table_nodes, ! log_nodes); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } ! else ! next_nodes = *log_nodes; ! /* Note: We already add the nodes of the initial state, ! then we don't need to add them here. */ ! ! context = re_string_context_at (mctx->input, ! re_string_cur_idx (mctx->input) - 1, ! mctx->eflags, preg->newline_anchor); ! next_state = mctx->state_log[cur_idx] ! = re_acquire_state_context (err, dfa, &next_nodes, context); ! /* We don't need to check errors here, since the return value of ! this function is next_state and ERR is already set. */ ! ! if (table_nodes != NULL) ! re_node_set_free (&next_nodes); ! } ! /* If the next state has back references. */ ! if (next_state != NULL && next_state->has_backref) ! { ! *err = transit_state_bkref (preg, next_state, mctx); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! next_state = mctx->state_log[cur_idx]; ! } } return next_state; } /* Helper functions for transit_state. */ /* Return the next state to which the current state STATE will transit by accepting the current input byte. */ --- 2081,2215 ---- #ifdef RE_ENABLE_I18N /* If the current state can accept multibyte. */ if (state->accept_mb) ! { ! *err = transit_state_mb (preg, state, mctx); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } #endif /* RE_ENABLE_I18N */ /* Then decide the next state with the single byte. */ if (1) ! { ! /* Use transition table */ ! ch = re_string_fetch_byte (mctx->input); ! trtable = fl_search ? state->trtable_search : state->trtable; ! if (trtable == NULL) ! { ! trtable = build_trtable (preg, state, fl_search); ! if (fl_search) ! state->trtable_search = trtable; ! else ! state->trtable = trtable; ! } ! next_state = trtable[ch]; ! } else ! { ! /* don't use transition table */ ! next_state = transit_state_sb (err, preg, state, fl_search, mctx); ! if (BE (next_state == NULL && err != REG_NOERROR, 0)) ! return NULL; ! } } + cur_idx = re_string_cur_idx (mctx->input); /* Update the state_log if we need. */ if (mctx->state_log != NULL) { if (cur_idx > mctx->state_log_top) ! { ! mctx->state_log[cur_idx] = next_state; ! mctx->state_log_top = cur_idx; ! } else if (mctx->state_log[cur_idx] == 0) ! { ! mctx->state_log[cur_idx] = next_state; ! } else ! { ! re_dfastate_t *pstate; ! unsigned int context; ! re_node_set next_nodes, *log_nodes, *table_nodes = NULL; ! /* If (state_log[cur_idx] != 0), it implies that cur_idx is ! the destination of a multibyte char/collating element/ ! back reference. Then the next state is the union set of ! these destinations and the results of the transition table. */ ! pstate = mctx->state_log[cur_idx]; ! log_nodes = pstate->entrance_nodes; ! if (next_state != NULL) ! { ! table_nodes = next_state->entrance_nodes; ! *err = re_node_set_init_union (&next_nodes, table_nodes, ! log_nodes); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } ! else ! next_nodes = *log_nodes; ! /* Note: We already add the nodes of the initial state, ! then we don't need to add them here. */ ! ! context = re_string_context_at (mctx->input, ! re_string_cur_idx (mctx->input) - 1, ! mctx->eflags, preg->newline_anchor); ! next_state = mctx->state_log[cur_idx] ! = re_acquire_state_context (err, dfa, &next_nodes, context); ! /* We don't need to check errors here, since the return value of ! this function is next_state and ERR is already set. */ ! ! if (table_nodes != NULL) ! re_node_set_free (&next_nodes); ! } ! } ! ! /* Check OP_OPEN_SUBEXP in the current state in case that we use them ! later. We must check them here, since the back references in the ! next state might use them. */ ! if (dfa->nbackref && next_state/* && fl_process_bkref */) ! { ! *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes, ! cur_idx); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } ! ! /* If the next state has back references. */ ! if (next_state != NULL && next_state->has_backref) ! { ! *err = transit_state_bkref (preg, next_state, mctx); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! next_state = mctx->state_log[cur_idx]; } return next_state; } /* Helper functions for transit_state. */ + static reg_errcode_t + check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx) + re_dfa_t *dfa; + re_match_context_t *mctx; + re_node_set *cur_nodes; + int str_idx; + { + int node_idx; + reg_errcode_t err; + + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; + } + /* Return the next state to which the current state STATE will transit by accepting the current input byte. */ *************** *** 2250,2284 **** { int cur_node = state->nodes.elems[node_cnt]; if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx)) ! { ! *err = re_node_set_merge (&next_nodes, ! dfa->eclosures + dfa->nexts[cur_node]); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } } if (fl_search) { #ifdef RE_ENABLE_I18N int not_initial = 0; if (MB_CUR_MAX > 1) ! for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt) ! if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER) ! { ! not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial; ! break; ! } if (!not_initial) #endif ! { ! *err = re_node_set_merge (&next_nodes, ! dfa->init_state->entrance_nodes); ! if (BE (*err != REG_NOERROR, 0)) ! return NULL; ! } } context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags, ! preg->newline_anchor); next_state = re_acquire_state_context (err, dfa, &next_nodes, context); /* We don't need to check errors here, since the return value of this function is next_state and ERR is already set. */ --- 2234,2274 ---- { int cur_node = state->nodes.elems[node_cnt]; if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx)) ! { ! *err = re_node_set_merge (&next_nodes, ! dfa->eclosures + dfa->nexts[cur_node]); ! if (BE (*err != REG_NOERROR, 0)) ! { ! re_node_set_free (&next_nodes); ! return NULL; ! } ! } } if (fl_search) { #ifdef RE_ENABLE_I18N int not_initial = 0; if (MB_CUR_MAX > 1) ! for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt) ! if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER) ! { ! not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial; ! break; ! } if (!not_initial) #endif ! { ! *err = re_node_set_merge (&next_nodes, ! dfa->init_state->entrance_nodes); ! if (BE (*err != REG_NOERROR, 0)) ! { ! re_node_set_free (&next_nodes); ! return NULL; ! } ! } } context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags, ! preg->newline_anchor); next_state = re_acquire_state_context (err, dfa, &next_nodes, context); /* We don't need to check errors here, since the return value of this function is next_state and ERR is already set. */ *************** *** 2308,2361 **** re_dfastate_t *dest_state; if (dfa->nodes[cur_node_idx].constraint) ! { ! context = re_string_context_at (mctx->input, ! re_string_cur_idx (mctx->input), ! mctx->eflags, preg->newline_anchor); ! if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, ! context)) ! continue; ! } /* How many bytes the node can accepts? */ if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type)) ! naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input, ! re_string_cur_idx (mctx->input)); if (naccepted == 0) ! continue; /* The node can accepts `naccepted' bytes. */ dest_idx = re_string_cur_idx (mctx->input) + naccepted; mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted ! : mctx->max_mb_elem_len); err = clean_state_log_if_need (mctx, dest_idx); if (BE (err != REG_NOERROR, 0)) ! return err; #ifdef DEBUG assert (dfa->nexts[cur_node_idx] != -1); #endif /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE, ! then we use pstate->nodes.elems[i] instead. */ new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]]; dest_state = mctx->state_log[dest_idx]; if (dest_state == NULL) ! dest_nodes = *new_nodes; else ! { ! err = re_node_set_init_union (&dest_nodes, ! dest_state->entrance_nodes, new_nodes); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags, ! preg->newline_anchor); mctx->state_log[dest_idx] ! = re_acquire_state_context (&err, dfa, &dest_nodes, context); ! if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) ! return err; if (dest_state != NULL) ! re_node_set_free (&dest_nodes); } return REG_NOERROR; } --- 2298,2351 ---- re_dfastate_t *dest_state; if (dfa->nodes[cur_node_idx].constraint) ! { ! context = re_string_context_at (mctx->input, ! re_string_cur_idx (mctx->input), ! mctx->eflags, preg->newline_anchor); ! if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, ! context)) ! continue; ! } /* How many bytes the node can accepts? */ if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type)) ! naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input, ! re_string_cur_idx (mctx->input)); if (naccepted == 0) ! continue; /* The node can accepts `naccepted' bytes. */ dest_idx = re_string_cur_idx (mctx->input) + naccepted; mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted ! : mctx->max_mb_elem_len); err = clean_state_log_if_need (mctx, dest_idx); if (BE (err != REG_NOERROR, 0)) ! return err; #ifdef DEBUG assert (dfa->nexts[cur_node_idx] != -1); #endif /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE, ! then we use pstate->nodes.elems[i] instead. */ new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]]; dest_state = mctx->state_log[dest_idx]; if (dest_state == NULL) ! dest_nodes = *new_nodes; else ! { ! err = re_node_set_init_union (&dest_nodes, ! dest_state->entrance_nodes, new_nodes); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags, ! preg->newline_anchor); mctx->state_log[dest_idx] ! = re_acquire_state_context (&err, dfa, &dest_nodes, context); if (dest_state != NULL) ! re_node_set_free (&dest_nodes); ! if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) ! return err; } return REG_NOERROR; } *************** *** 2368,2392 **** re_match_context_t *mctx; { reg_errcode_t err; ! re_dfastate_t **work_state_log; ! ! work_state_log = re_malloc (re_dfastate_t *, ! re_string_cur_idx (mctx->input) + 1); ! if (BE (work_state_log == NULL, 0)) ! return REG_ESPACE; ! ! err = transit_state_bkref_loop (preg, &pstate->nodes, work_state_log, mctx); ! re_free (work_state_log); return err; } - /* Caller must allocate `work_state_log'. */ - static reg_errcode_t ! transit_state_bkref_loop (preg, nodes, work_state_log, mctx) const regex_t *preg; re_node_set *nodes; - re_dfastate_t **work_state_log; re_match_context_t *mctx; { reg_errcode_t err; --- 2358,2371 ---- re_match_context_t *mctx; { reg_errcode_t err; ! err = transit_state_bkref_loop (preg, &pstate->nodes, mctx); return err; } static reg_errcode_t ! transit_state_bkref_loop (preg, nodes, mctx) const regex_t *preg; re_node_set *nodes; re_match_context_t *mctx; { reg_errcode_t err; *************** *** 2404,2498 **** unsigned int context; re_token_t *node = dfa->nodes + node_idx; re_node_set *new_dest_nodes; - re_sift_context_t sctx; /* Check whether `node' is a backreference or not. */ if (node->type == OP_BACK_REF) ! subexp_idx = node->opr.idx; else ! continue; if (node->constraint) ! { ! context = re_string_context_at (mctx->input, cur_str_idx, ! mctx->eflags, preg->newline_anchor); ! if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) ! continue; ! } /* `node' is a backreference. ! Check the substring which the substring matched. */ ! sift_ctx_init (&sctx, work_state_log, NULL, node_idx, cur_str_idx, ! subexp_idx); ! sctx.cur_bkref = node_idx; ! match_ctx_clear_flag (mctx); ! err = sift_states_backward (preg, mctx, &sctx); if (BE (err != REG_NOERROR, 0)) ! return err; /* And add the epsilon closures (which is `new_dest_nodes') of ! the backreference to appropriate state_log. */ #ifdef DEBUG assert (dfa->nexts[node_idx] != -1); #endif ! for (bkc_idx = 0; bkc_idx < mctx->nbkref_ents; ++bkc_idx) ! { ! int subexp_len; ! re_dfastate_t *dest_state; ! struct re_backref_cache_entry *bkref_ent; ! bkref_ent = mctx->bkref_ents + bkc_idx; ! if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) ! continue; ! subexp_len = bkref_ent->subexp_len; ! new_dest_nodes = (subexp_len == 0 ! ? dfa->eclosures + dfa->edests[node_idx].elems[0] ! : dfa->eclosures + dfa->nexts[node_idx]); ! dest_str_idx = (cur_str_idx + bkref_ent->subexp_to ! - bkref_ent->subexp_from); ! context = (IS_WORD_CHAR (re_string_byte_at (mctx->input, ! dest_str_idx - 1)) ! ? CONTEXT_WORD : 0); ! dest_state = mctx->state_log[dest_str_idx]; ! prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 ! : mctx->state_log[cur_str_idx]->nodes.nelem); ! /* Add `new_dest_node' to state_log. */ ! if (dest_state == NULL) ! { ! mctx->state_log[dest_str_idx] ! = re_acquire_state_context (&err, dfa, new_dest_nodes, ! context); ! if (BE (mctx->state_log[dest_str_idx] == NULL ! && err != REG_NOERROR, 0)) ! return err; ! } ! else ! { ! re_node_set dest_nodes; ! err = re_node_set_init_union (&dest_nodes, ! dest_state->entrance_nodes, ! new_dest_nodes); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! mctx->state_log[dest_str_idx] ! = re_acquire_state_context (&err, dfa, &dest_nodes, context); ! if (BE (mctx->state_log[dest_str_idx] == NULL ! && err != REG_NOERROR, 0)) ! return err; ! re_node_set_free (&dest_nodes); ! } ! /* We need to check recursively if the backreference can epsilon ! transit. */ ! if (subexp_len == 0 ! && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) ! { ! err = transit_state_bkref_loop (preg, new_dest_nodes, ! work_state_log, mctx); ! if (BE (err != REG_NOERROR, 0)) ! return err; ! } ! } } re_free (cur_regs); return REG_NOERROR; } --- 2383,3027 ---- unsigned int context; re_token_t *node = dfa->nodes + node_idx; re_node_set *new_dest_nodes; /* Check whether `node' is a backreference or not. */ if (node->type == OP_BACK_REF) ! subexp_idx = node->opr.idx; else ! continue; if (node->constraint) ! { ! context = re_string_context_at (mctx->input, cur_str_idx, ! mctx->eflags, preg->newline_anchor); ! if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) ! continue; ! } /* `node' is a backreference. ! Check the substring which the substring matched. */ ! err = get_subexp (preg, mctx, node_idx, cur_str_idx, subexp_idx - 1); if (BE (err != REG_NOERROR, 0)) ! goto free_return; /* And add the epsilon closures (which is `new_dest_nodes') of ! the backreference to appropriate state_log. */ #ifdef DEBUG assert (dfa->nexts[node_idx] != -1); #endif ! bkc_idx = search_cur_bkref_entry (mctx, cur_str_idx); ! for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) ! { ! int subexp_len; ! re_dfastate_t *dest_state; ! struct re_backref_cache_entry *bkref_ent; ! bkref_ent = mctx->bkref_ents + bkc_idx; ! if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) ! continue; ! subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; ! new_dest_nodes = (subexp_len == 0 ! ? dfa->eclosures + dfa->edests[node_idx].elems[0] ! : dfa->eclosures + dfa->nexts[node_idx]); ! dest_str_idx = (cur_str_idx + bkref_ent->subexp_to ! - bkref_ent->subexp_from); ! context = re_string_context_at (mctx->input, dest_str_idx - 1, ! mctx->eflags, preg->newline_anchor); ! dest_state = mctx->state_log[dest_str_idx]; ! prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 ! : mctx->state_log[cur_str_idx]->nodes.nelem); ! /* Add `new_dest_node' to state_log. */ ! if (dest_state == NULL) ! { ! mctx->state_log[dest_str_idx] ! = re_acquire_state_context (&err, dfa, new_dest_nodes, ! context); ! if (BE (mctx->state_log[dest_str_idx] == NULL ! && err != REG_NOERROR, 0)) ! goto free_return; ! } ! else ! { ! re_node_set dest_nodes; ! err = re_node_set_init_union (&dest_nodes, ! dest_state->entrance_nodes, ! new_dest_nodes); ! if (BE (err != REG_NOERROR, 0)) ! { ! re_node_set_free (&dest_nodes); ! goto free_return; ! } ! mctx->state_log[dest_str_idx] ! = re_acquire_state_context (&err, dfa, &dest_nodes, context); ! re_node_set_free (&dest_nodes); ! if (BE (mctx->state_log[dest_str_idx] == NULL ! && err != REG_NOERROR, 0)) ! goto free_return; ! } ! /* We need to check recursively if the backreference can epsilon ! transit. */ ! if (subexp_len == 0 ! && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) ! { ! err = check_subexp_matching_top (dfa, mctx, new_dest_nodes, ! cur_str_idx); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; ! err = transit_state_bkref_loop (preg, new_dest_nodes, mctx); ! if (BE (err != REG_NOERROR, 0)) ! goto free_return; ! } ! } } + err = REG_NOERROR; + free_return: re_free (cur_regs); + return err; + } + + static reg_errcode_t + get_subexp (preg, mctx, bkref_node, bkref_str_idx, subexp_idx) + const regex_t *preg; + re_match_context_t *mctx; + int bkref_node, bkref_str_idx, subexp_idx; + { + int sub_top_idx; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + char *buf = re_string_get_buffer (mctx->input); + /* For each sub expression... */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str; + char *bkref_str; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_idx) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str = buf + bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0 + && my_memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0) + break; /* We don't need to search this sub expression any more. */ + bkref_str += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, + bkref_str_idx, subexp_idx); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + re_sub_match_last_t cur_last; + re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0 + && my_memcmp (bkref_str++, buf + sl_str - 1, 1) != 0) + break; /* We don't need to search this sub expression any more. */ + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_idx, 0); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + memset (&cur_last, '\0', sizeof (re_sub_match_last_t)); + cur_last.node = cls_node; + cur_last.str_idx = sl_str; + err = check_arrival (preg, mctx, sub_top, &cur_last, -1, -1); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, + bkref_str_idx, subexp_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; + } + + static reg_errcode_t + get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str, + subexp_idx) + const regex_t *preg; + re_match_context_t *mctx; + re_sub_match_top_t *sub_top; + re_sub_match_last_t *sub_last; + int bkref_node, bkref_str, subexp_idx; + { + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (preg, mctx, sub_top, sub_last, bkref_node, bkref_str); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx, sub_top, sub_last); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + clean_state_log_if_need (mctx, to_idx); + return REG_NOERROR; + } + + /* Find the first node which is '(' or ')', and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. */ + + static int + find_subexp_node (dfa, nodes, subexp_idx, fl_open) + re_dfa_t *dfa; + re_node_set *nodes; + int subexp_idx, fl_open; + { + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + re_token_t *node = dfa->nodes + cls_node; + if (((fl_open && node->type == OP_OPEN_SUBEXP) + || (!fl_open && node->type == OP_CLOSE_SUBEXP)) + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; + } + + static reg_errcode_t + check_arrival (preg, mctx, sub_top, sub_last, bkref_node, bkref_str) + const regex_t *preg; + re_match_context_t *mctx; + re_sub_match_top_t *sub_top; + re_sub_match_last_t *sub_last; + int bkref_node, bkref_str; + { + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t err; + int backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes; + re_dfastate_t **backup_state_log; + + state_array_t *path; + int top_node, top_str, ex_subexp, last_node, last_str, fl_open; + + path = bkref_node < 0 ? sub_top->path : &sub_last->path; + top_node = bkref_node < 0 ? sub_top->node : sub_last->node; + top_str = bkref_node < 0 ? sub_top->str_idx : sub_last->str_idx; + last_node = bkref_node < 0 ? sub_last->node : bkref_node; + last_str = bkref_node < 0 ? sub_last->str_idx : bkref_str; + + ex_subexp = dfa->nodes[sub_top->node].opr.idx; + fl_open = (bkref_node >= 0); + + /* Extend the buffer if we need. */ + if (path->alloc < last_str + mctx->max_mb_elem_len + 1) + { + int old_alloc = path->alloc; + path->alloc = last_str + mctx->max_mb_elem_len + 1; + path->array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (path->array == NULL) + return REG_ESPACE; + memset (path->array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx == 0 ? top_str : path->next_idx; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input->cur_idx; + mctx->state_log = path->array; + mctx->input->cur_idx = str_idx; + + /* Setup initial node set. */ + if (str_idx == top_str) + { + unsigned int context; + re_node_set init_nodes; + context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags, + preg->newline_anchor); + err = re_node_set_init_1 (&init_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = expand_eclosures (dfa, &init_nodes, ex_subexp, fl_open); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (init_nodes.nelem) + { + err = expand_bkref_cache (preg, mctx, sub_top, sub_last, &init_nodes, + str_idx, last_str, ex_subexp, fl_open); + if (BE ( err != REG_NOERROR, 0)) + return err; + } + cur_state = re_acquire_state_context (&err, dfa, &init_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + return err; + mctx->state_log[str_idx] = cur_state; + re_node_set_free (&init_nodes); + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + unsigned int context; + re_node_set init_nodes; + context = re_string_context_at (mctx->input, str_idx - 1, + mctx->eflags, preg->newline_anchor); + err = re_node_set_init_copy (&init_nodes, &cur_state->nodes); + if (BE ( err != REG_NOERROR, 0)) + return err; + err = expand_bkref_cache (preg, mctx, sub_top, sub_last, &init_nodes, + str_idx, last_str, ex_subexp, fl_open); + if (BE ( err != REG_NOERROR, 0)) + return err; + cur_state = re_acquire_state_context (&err, dfa, &init_nodes, + context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + return err; + mctx->state_log[str_idx] = cur_state; + re_node_set_free (&init_nodes); + } + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + int cur_idx; + unsigned int context; + re_node_set next_nodes; + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_init_copy (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + + cur_nodes = cur_state? &cur_state->nodes : &empty_set; + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; + re_token_type_t type = dfa->nodes[cur_node].type; + if (IS_EPSILON_NODE(type)) + continue; + #ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (ACCEPT_MB_NODE (type)) + { + naccepted = check_node_accept_bytes (preg, cur_node, mctx->input, + str_idx); + if (naccepted > 1) + { + re_node_set union_set; + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + if (dest_state) + { + err = re_node_set_init_copy (&union_set, + &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + err = re_node_set_insert (&union_set, next_node); + if (BE (err < 0, 0)) + return REG_ESPACE; + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + #endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (preg, dfa->nodes + cur_node, mctx, + str_idx)) + { + err = re_node_set_insert (&next_nodes, dfa->nexts[cur_node]); + if (BE (err < 0, 0)) + return REG_ESPACE; + } + } + + ++str_idx; + if (next_nodes.nelem) + { + err = expand_eclosures (dfa, &next_nodes, ex_subexp, fl_open); + if (BE (err != REG_NOERROR, 0)) + return err; + } + if (next_nodes.nelem) + { + err = expand_bkref_cache (preg, mctx, sub_top, sub_last, &next_nodes, + str_idx, last_str, ex_subexp, fl_open); + if (BE ( err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags, + preg->newline_anchor); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + return err; + mctx->state_log[str_idx] = cur_state; + re_node_set_free (&next_nodes); + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input->cur_idx = backup_cur_idx; + + if (cur_nodes == NULL) + return REG_NOMATCH; + /* Then check the current node set has the node LAST_NODE. */ + return (re_node_set_contains (cur_nodes, last_node) + || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR + : REG_NOMATCH); + } + + /* Helper functions for check_arrival. */ + + static reg_errcode_t + expand_eclosures (dfa, cur_nodes, ex_subexp, fl_open) + re_dfa_t *dfa; + re_node_set *cur_nodes; + int ex_subexp, fl_open; + { + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; + #ifdef DEBUG + assert (cur_nodes->nelem); + #endif + re_node_set_alloc (&new_nodes, cur_nodes->nelem); + /* Create a new node set with the nodes which are epsilon closures of + a node in cur_nodes. */ + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + err = re_node_set_merge (&new_nodes, dfa->eclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + outside_node = find_subexp_node (dfa, &new_nodes, ex_subexp, fl_open); + if (outside_node == -1) + { + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; + } + /* In this case, we have some nodes which are outside, remove them. */ + re_node_set_empty (&new_nodes); + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open); + if (outside_node == -1) + { + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + err = expand_eclosures_sub (dfa, &new_nodes, cur_node, ex_subexp, + fl_open); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; + } + + static reg_errcode_t + expand_eclosures_sub (dfa, dst_nodes, target, ex_subexp, fl_open) + re_dfa_t *dfa; + int target, ex_subexp, fl_open; + re_node_set *dst_nodes; + { + int cur_node, type; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + type = dfa->nodes[cur_node].type; + + if (((type == OP_OPEN_SUBEXP && fl_open) + || (type == OP_CLOSE_SUBEXP && !fl_open)) + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (!fl_open) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = expand_eclosures_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, fl_open); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; + } + + + /* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + + static reg_errcode_t + expand_bkref_cache (preg, mctx, sub_top, sub_last, cur_nodes, cur_str, + last_str, ex_subexp, fl_open) + const regex_t *preg; + re_match_context_t *mctx; + re_sub_match_top_t *sub_top; + re_sub_match_last_t *sub_last; + int cur_str, last_str, ex_subexp, fl_open; + re_node_set *cur_nodes; + { + reg_errcode_t err; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + re_node_set processed_nodes; + int node_idx, cache_idx, cur_subexp_idx; + /* The current state. */ + cur_subexp_idx = dfa->nodes[sub_top->node].opr.idx; + re_node_set_init_empty (&processed_nodes); + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int bkref_node = cur_nodes->elems[node_idx]; + if (dfa->nodes[bkref_node].type != OP_BACK_REF + || re_node_set_contains (&processed_nodes, bkref_node)) + continue; + + for (cache_idx = 0; cache_idx < mctx->nbkref_ents; ++cache_idx) + { + int to_idx, next_node; + struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx; + /* Is this entry ENT is appropriate? */ + if (ent->str_idx != cur_str + || !re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + re_node_set new_dests; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = expand_eclosures (dfa, &new_dests, ex_subexp, fl_open); + if (BE (err != REG_NOERROR, 0)) + return err; + err = re_node_set_merge (cur_nodes, &new_dests); + if (BE (err != REG_NOERROR, 0)) + return err; + re_node_set_free (&new_dests); + /* TODO: It is still inefficient... */ + node_idx = -1; + break; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + ret = re_node_set_insert (&union_set, next_node); + if (BE (err < 0, 0)) + return REG_ESPACE; + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, + &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + } + re_node_set_free (&processed_nodes); return REG_NOERROR; } *************** *** 2508,2515 **** reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i, j, k, ch; int ndests; /* Number of the destination states from `state'. */ ! re_dfastate_t **trtable, **dest_states, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; bitset *dests_ch; bitset acceptable; --- 3037,3046 ---- reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i, j, k, ch; + int dests_node_malloced = 0, dest_states_malloced = 0; int ndests; /* Number of the destination states from `state'. */ ! re_dfastate_t **trtable; ! re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; bitset *dests_ch; bitset acceptable; *************** *** 2518,2551 **** from `state'. `dests_node[i]' represents the nodes which i-th destination state contains, and `dests_ch[i]' represents the characters which i-th destination state accepts. */ ! dests_node = re_malloc (re_node_set, SBC_MAX); ! dests_ch = re_malloc (bitset, SBC_MAX); /* Initialize transiton table. */ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); ! if (BE (dests_node == NULL || dests_ch == NULL || trtable == NULL, 0)) ! return NULL; /* At first, group all nodes belonging to `state' into several destinations. */ ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch); if (BE (ndests <= 0, 0)) { ! re_free (dests_node); ! re_free (dests_ch); /* Return NULL in case of an error, trtable otherwise. */ ! return (ndests < 0) ? NULL : trtable; } - dest_states = re_malloc (re_dfastate_t *, ndests); - dest_states_word = re_malloc (re_dfastate_t *, ndests); - dest_states_nl = re_malloc (re_dfastate_t *, ndests); - bitset_empty (acceptable); - err = re_node_set_alloc (&follows, ndests + 1); ! if (BE (dest_states == NULL || dest_states_word == NULL ! || dest_states_nl == NULL || err != REG_NOERROR, 0)) ! return NULL; /* Then build the states for all destinations. */ for (i = 0; i < ndests; ++i) --- 3049,3124 ---- from `state'. `dests_node[i]' represents the nodes which i-th destination state contains, and `dests_ch[i]' represents the characters which i-th destination state accepts. */ ! #ifdef _LIBC ! if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) ! dests_node = (re_node_set *) ! alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); ! else ! #endif ! { ! dests_node = (re_node_set *) ! malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); ! if (BE (dests_node == NULL, 0)) ! return NULL; ! dests_node_malloced = 1; ! } ! dests_ch = (bitset *) (dests_node + SBC_MAX); /* Initialize transiton table. */ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); ! if (BE (trtable == NULL, 0)) ! { ! if (dests_node_malloced) ! free (dests_node); ! return NULL; ! } /* At first, group all nodes belonging to `state' into several destinations. */ ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch); if (BE (ndests <= 0, 0)) { ! if (dests_node_malloced) ! free (dests_node); /* Return NULL in case of an error, trtable otherwise. */ ! if (ndests == 0) ! return trtable; ! free (trtable); ! return NULL; } err = re_node_set_alloc (&follows, ndests + 1); ! if (BE (err != REG_NOERROR, 0)) ! goto out_free; ! ! #ifdef _LIBC ! if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX ! + ndests * 3 * sizeof (re_dfastate_t *))) ! dest_states = (re_dfastate_t **) ! alloca (ndests * 3 * sizeof (re_dfastate_t *)); ! else ! #endif ! { ! dest_states = (re_dfastate_t **) ! malloc (ndests * 3 * sizeof (re_dfastate_t *)); ! if (BE (dest_states == NULL, 0)) ! { ! out_free: ! if (dest_states_malloced) ! free (dest_states); ! re_node_set_free (&follows); ! for (i = 0; i < ndests; ++i) ! re_node_set_free (dests_node + i); ! free (trtable); ! if (dests_node_malloced) ! free (dests_node); ! return NULL; ! } ! dest_states_malloced = 1; ! } ! dest_states_word = dest_states + ndests; ! dest_states_nl = dest_states_word + ndests; ! bitset_empty (acceptable); /* Then build the states for all destinations. */ for (i = 0; i < ndests; ++i) *************** *** 2554,2609 **** re_node_set_empty (&follows); /* Merge the follows of this destination states. */ for (j = 0; j < dests_node[i].nelem; ++j) ! { ! next_node = dfa->nexts[dests_node[i].elems[j]]; ! if (next_node != -1) ! { ! err = re_node_set_merge (&follows, dfa->eclosures + next_node); ! if (BE (err != REG_NOERROR, 0)) ! return NULL; ! } ! } /* If search flag is set, merge the initial state. */ if (fl_search) ! { #ifdef RE_ENABLE_I18N ! int not_initial = 0; ! for (j = 0; j < follows.nelem; ++j) ! if (dfa->nodes[follows.elems[j]].type == CHARACTER) ! { ! not_initial = dfa->nodes[follows.elems[j]].mb_partial; ! break; ! } ! if (!not_initial) ! #endif ! { ! err = re_node_set_merge (&follows, ! dfa->init_state->entrance_nodes); ! if (BE (err != REG_NOERROR, 0)) ! return NULL; ! } ! } dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) ! return NULL; /* If the new state has context constraint, ! build appropriate states for these contexts. */ if (dest_states[i]->has_constraint) ! { ! dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, ! CONTEXT_WORD); ! if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) ! return NULL; ! dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, ! CONTEXT_NEWLINE); ! if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) ! return NULL; ! } else ! { ! dest_states_word[i] = dest_states[i]; ! dest_states_nl[i] = dest_states[i]; ! } bitset_merge (acceptable, dests_ch[i]); } --- 3127,3182 ---- re_node_set_empty (&follows); /* Merge the follows of this destination states. */ for (j = 0; j < dests_node[i].nelem; ++j) ! { ! next_node = dfa->nexts[dests_node[i].elems[j]]; ! if (next_node != -1) ! { ! err = re_node_set_merge (&follows, dfa->eclosures + next_node); ! if (BE (err != REG_NOERROR, 0)) ! goto out_free; ! } ! } /* If search flag is set, merge the initial state. */ if (fl_search) ! { #ifdef RE_ENABLE_I18N ! int not_initial = 0; ! for (j = 0; j < follows.nelem; ++j) ! if (dfa->nodes[follows.elems[j]].type == CHARACTER) ! { ! not_initial = dfa->nodes[follows.elems[j]].mb_partial; ! break; ! } ! if (!not_initial) ! #endif ! { ! err = re_node_set_merge (&follows, ! dfa->init_state->entrance_nodes); ! if (BE (err != REG_NOERROR, 0)) ! goto out_free; ! } ! } dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) ! goto out_free; /* If the new state has context constraint, ! build appropriate states for these contexts. */ if (dest_states[i]->has_constraint) ! { ! dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, ! CONTEXT_WORD); ! if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) ! goto out_free; ! dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, ! CONTEXT_NEWLINE); ! if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) ! goto out_free; ! } else ! { ! dest_states_word[i] = dest_states[i]; ! dest_states_nl[i] = dest_states[i]; ! } bitset_merge (acceptable, dests_ch[i]); } *************** *** 2612,2669 **** for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if ((acceptable[i] >> j) & 1) ! { ! /* The current state accepts the character ch. */ ! if (IS_WORD_CHAR (ch)) ! { ! for (k = 0; k < ndests; ++k) ! if ((dests_ch[k][i] >> j) & 1) ! { ! /* k-th destination accepts the word character ch. */ ! trtable[ch] = dest_states_word[k]; ! /* There must be only one destination which accepts ! character ch. See group_nodes_into_DFAstates. */ ! break; ! } ! } ! else /* not WORD_CHAR */ ! { ! for (k = 0; k < ndests; ++k) ! if ((dests_ch[k][i] >> j) & 1) ! { ! /* k-th destination accepts the non-word character ch. */ ! trtable[ch] = dest_states[k]; ! /* There must be only one destination which accepts ! character ch. See group_nodes_into_DFAstates. */ ! break; ! } ! } ! } /* new line */ if (bitset_contain (acceptable, NEWLINE_CHAR)) { /* The current state accepts newline character. */ for (k = 0; k < ndests; ++k) ! if (bitset_contain (dests_ch[k], NEWLINE_CHAR)) ! { ! /* k-th destination accepts newline character. */ ! trtable[NEWLINE_CHAR] = dest_states_nl[k]; ! /* There must be only one destination which accepts ! newline. See group_nodes_into_DFAstates. */ ! break; ! } } ! re_free (dest_states_nl); ! re_free (dest_states_word); ! re_free (dest_states); re_node_set_free (&follows); for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); ! re_free (dests_ch); ! re_free (dests_node); return trtable; } --- 3185,3241 ---- for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if ((acceptable[i] >> j) & 1) ! { ! /* The current state accepts the character ch. */ ! if (IS_WORD_CHAR (ch)) ! { ! for (k = 0; k < ndests; ++k) ! if ((dests_ch[k][i] >> j) & 1) ! { ! /* k-th destination accepts the word character ch. */ ! trtable[ch] = dest_states_word[k]; ! /* There must be only one destination which accepts ! character ch. See group_nodes_into_DFAstates. */ ! break; ! } ! } ! else /* not WORD_CHAR */ ! { ! for (k = 0; k < ndests; ++k) ! if ((dests_ch[k][i] >> j) & 1) ! { ! /* k-th destination accepts the non-word character ch. */ ! trtable[ch] = dest_states[k]; ! /* There must be only one destination which accepts ! character ch. See group_nodes_into_DFAstates. */ ! break; ! } ! } ! } /* new line */ if (bitset_contain (acceptable, NEWLINE_CHAR)) { /* The current state accepts newline character. */ for (k = 0; k < ndests; ++k) ! if (bitset_contain (dests_ch[k], NEWLINE_CHAR)) ! { ! /* k-th destination accepts newline character. */ ! trtable[NEWLINE_CHAR] = dest_states_nl[k]; ! /* There must be only one destination which accepts ! newline. See group_nodes_into_DFAstates. */ ! break; ! } } ! if (dest_states_malloced) ! free (dest_states); re_node_set_free (&follows); for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); ! if (dests_node_malloced) ! free (dests_node); return trtable; } *************** *** 2698,2802 **** /* Enumerate all single byte character this node can accept. */ if (type == CHARACTER) ! bitset_set (accepts, node->opr.c); else if (type == SIMPLE_BRACKET) ! { ! bitset_merge (accepts, node->opr.sbcset); ! } else if (type == OP_PERIOD) ! { ! bitset_set_all (accepts); ! if (!(preg->syntax & RE_DOT_NEWLINE)) ! bitset_clear (accepts, '\n'); ! if (preg->syntax & RE_DOT_NOT_NULL) ! bitset_clear (accepts, '\0'); ! } else ! continue; /* Check the `accepts' and sift the characters which are not ! match it the context. */ if (constraint) ! { ! if (constraint & NEXT_WORD_CONSTRAINT) ! for (j = 0; j < BITSET_UINTS; ++j) ! accepts[j] &= dfa->word_char[j]; ! if (constraint & NEXT_NOTWORD_CONSTRAINT) ! for (j = 0; j < BITSET_UINTS; ++j) ! accepts[j] &= ~dfa->word_char[j]; ! if (constraint & NEXT_NEWLINE_CONSTRAINT) ! { ! int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); ! bitset_empty (accepts); ! if (accepts_newline) ! bitset_set (accepts, NEWLINE_CHAR); ! else ! continue; ! } ! } /* Then divide `accepts' into DFA states, or create a new ! state. */ for (j = 0; j < ndests; ++j) ! { ! bitset intersec; /* Intersection sets, see below. */ ! bitset remains; ! /* Flags, see below. */ ! int has_intersec, not_subset, not_consumed; ! ! /* Optimization, skip if this state doesn't accept the character. */ ! if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) ! continue; ! ! /* Enumerate the intersection set of this state and `accepts'. */ ! has_intersec = 0; ! for (k = 0; k < BITSET_UINTS; ++k) ! has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; ! /* And skip if the intersection set is empty. */ ! if (!has_intersec) ! continue; ! ! /* Then check if this state is a subset of `accepts'. */ ! not_subset = not_consumed = 0; ! for (k = 0; k < BITSET_UINTS; ++k) ! { ! not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; ! not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; ! } ! ! /* If this state isn't a subset of `accepts', create a ! new group state, which has the `remains'. */ ! if (not_subset) ! { ! bitset_copy (dests_ch[ndests], remains); ! bitset_copy (dests_ch[j], intersec); ! err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); ! if (BE (err != REG_NOERROR, 0)) ! return -1; ! ++ndests; ! } ! ! /* Put the position in the current group. */ ! err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); ! if (BE (err < 0, 0)) ! return -1; ! ! /* If all characters are consumed, go to next node. */ ! if (!not_consumed) ! break; ! } /* Some characters remain, create a new group. */ if (j == ndests) ! { ! bitset_copy (dests_ch[ndests], accepts); ! err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); ! if (BE (err != REG_NOERROR, 0)) ! return -1; ! ++ndests; ! bitset_empty (accepts); ! } } return ndests; } #ifdef RE_ENABLE_I18N --- 3270,3378 ---- /* Enumerate all single byte character this node can accept. */ if (type == CHARACTER) ! bitset_set (accepts, node->opr.c); else if (type == SIMPLE_BRACKET) ! { ! bitset_merge (accepts, node->opr.sbcset); ! } else if (type == OP_PERIOD) ! { ! bitset_set_all (accepts); ! if (!(preg->syntax & RE_DOT_NEWLINE)) ! bitset_clear (accepts, '\n'); ! if (preg->syntax & RE_DOT_NOT_NULL) ! bitset_clear (accepts, '\0'); ! } else ! continue; /* Check the `accepts' and sift the characters which are not ! match it the context. */ if (constraint) ! { ! if (constraint & NEXT_WORD_CONSTRAINT) ! for (j = 0; j < BITSET_UINTS; ++j) ! accepts[j] &= dfa->word_char[j]; ! if (constraint & NEXT_NOTWORD_CONSTRAINT) ! for (j = 0; j < BITSET_UINTS; ++j) ! accepts[j] &= ~dfa->word_char[j]; ! if (constraint & NEXT_NEWLINE_CONSTRAINT) ! { ! int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); ! bitset_empty (accepts); ! if (accepts_newline) ! bitset_set (accepts, NEWLINE_CHAR); ! else ! continue; ! } ! } /* Then divide `accepts' into DFA states, or create a new ! state. */ for (j = 0; j < ndests; ++j) ! { ! bitset intersec; /* Intersection sets, see below. */ ! bitset remains; ! /* Flags, see below. */ ! int has_intersec, not_subset, not_consumed; ! ! /* Optimization, skip if this state doesn't accept the character. */ ! if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) ! continue; ! ! /* Enumerate the intersection set of this state and `accepts'. */ ! has_intersec = 0; ! for (k = 0; k < BITSET_UINTS; ++k) ! has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; ! /* And skip if the intersection set is empty. */ ! if (!has_intersec) ! continue; ! ! /* Then check if this state is a subset of `accepts'. */ ! not_subset = not_consumed = 0; ! for (k = 0; k < BITSET_UINTS; ++k) ! { ! not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; ! not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; ! } ! ! /* If this state isn't a subset of `accepts', create a ! new group state, which has the `remains'. */ ! if (not_subset) ! { ! bitset_copy (dests_ch[ndests], remains); ! bitset_copy (dests_ch[j], intersec); ! err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); ! if (BE (err != REG_NOERROR, 0)) ! goto error_return; ! ++ndests; ! } ! ! /* Put the position in the current group. */ ! err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); ! if (BE (err < 0, 0)) ! goto error_return; ! ! /* If all characters are consumed, go to next node. */ ! if (!not_consumed) ! break; ! } /* Some characters remain, create a new group. */ if (j == ndests) ! { ! bitset_copy (dests_ch[ndests], accepts); ! err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); ! if (BE (err != REG_NOERROR, 0)) ! goto error_return; ! ++ndests; ! bitset_empty (accepts); ! } } return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; } #ifdef RE_ENABLE_I18N *************** *** 2829,2838 **** { /* '.' accepts any one character except the following two cases. */ if ((!(preg->syntax & RE_DOT_NEWLINE) && ! re_string_byte_at (input, str_idx) == '\n') || ! ((preg->syntax & RE_DOT_NOT_NULL) && ! re_string_byte_at (input, str_idx) == '\0')) ! return 0; return char_len; } else if (node->type == COMPLEX_BRACKET) --- 3405,3414 ---- { /* '.' accepts any one character except the following two cases. */ if ((!(preg->syntax & RE_DOT_NEWLINE) && ! re_string_byte_at (input, str_idx) == '\n') || ! ((preg->syntax & RE_DOT_NOT_NULL) && ! re_string_byte_at (input, str_idx) == '\0')) ! return 0; return char_len; } else if (node->type == COMPLEX_BRACKET) *************** *** 2843,2987 **** # endif /* _LIBC */ int match_len = 0; wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) ! ? re_string_wchar_at (input, str_idx) : 0); /* match with multibyte character? */ for (i = 0; i < cset->nmbchars; ++i) ! if (wc == cset->mbchars[i]) ! { ! match_len = char_len; ! goto check_node_accept_bytes_match; ! } /* match with character_class? */ for (i = 0; i < cset->nchar_classes; ++i) ! { ! wctype_t wt = cset->char_classes[i]; ! if (__iswctype (wc, wt)) ! { ! match_len = char_len; ! goto check_node_accept_bytes_match; ! } ! } # ifdef _LIBC if (nrules != 0) ! { ! unsigned int in_collseq = 0; ! const int32_t *table, *indirect; ! const unsigned char *weights, *extra; ! const char *collseqwc; ! int32_t idx; ! /* This #include defines a local function! */ # include ! /* match with collating_symbol? */ ! if (cset->ncoll_syms) ! extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); ! for (i = 0; i < cset->ncoll_syms; ++i) ! { ! const unsigned char *coll_sym = extra + cset->coll_syms[i]; ! /* Compare the length of input collating element and ! the length of current collating element. */ ! if (*coll_sym != elem_len) ! continue; ! /* Compare each bytes. */ ! for (j = 0; j < *coll_sym; j++) ! if (pin[j] != coll_sym[1 + j]) ! break; ! if (j == *coll_sym) ! { ! /* Match if every bytes is equal. */ ! match_len = j; ! goto check_node_accept_bytes_match; ! } ! } ! ! if (cset->nranges) ! { ! if (elem_len <= char_len) ! { ! collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); ! in_collseq = collseq_table_lookup (collseqwc, wc); ! } ! else ! in_collseq = find_collation_sequence_value (pin, elem_len); ! } ! /* match with range expression? */ ! for (i = 0; i < cset->nranges; ++i) ! if (cset->range_starts[i] <= in_collseq ! && in_collseq <= cset->range_ends[i]) ! { ! match_len = elem_len; ! goto check_node_accept_bytes_match; ! } ! ! /* match with equivalence_class? */ ! if (cset->nequiv_classes) ! { ! const unsigned char *cp = pin; ! table = (const int32_t *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); ! weights = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); ! extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); ! indirect = (const int32_t *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); ! idx = findidx (&cp); ! if (idx > 0) ! for (i = 0; i < cset->nequiv_classes; ++i) ! { ! int32_t equiv_class_idx = cset->equiv_classes[i]; ! size_t weight_len = weights[idx]; ! if (weight_len == weights[equiv_class_idx]) ! { ! int cnt = 0; ! while (cnt <= weight_len ! && (weights[equiv_class_idx + 1 + cnt] ! == weights[idx + 1 + cnt])) ! ++cnt; ! if (cnt > weight_len) ! { ! match_len = elem_len; ! goto check_node_accept_bytes_match; ! } ! } ! } ! } ! } else # endif /* _LIBC */ ! { ! /* match with range expression? */ #if __GNUC__ >= 2 ! wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; #else ! wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; ! cmp_buf[2] = wc; #endif ! for (i = 0; i < cset->nranges; ++i) ! { ! cmp_buf[0] = cset->range_starts[i]; ! cmp_buf[4] = cset->range_ends[i]; ! if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 ! && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) ! { ! match_len = char_len; ! goto check_node_accept_bytes_match; ! } ! } ! } check_node_accept_bytes_match: if (!cset->non_match) ! return match_len; else ! { ! if (match_len > 0) ! return 0; ! else ! return (elem_len > char_len) ? elem_len : char_len; ! } } return 0; } --- 3419,3563 ---- # endif /* _LIBC */ int match_len = 0; wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) ! ? re_string_wchar_at (input, str_idx) : 0); /* match with multibyte character? */ for (i = 0; i < cset->nmbchars; ++i) ! if (wc == cset->mbchars[i]) ! { ! match_len = char_len; ! goto check_node_accept_bytes_match; ! } /* match with character_class? */ for (i = 0; i < cset->nchar_classes; ++i) ! { ! wctype_t wt = cset->char_classes[i]; ! if (__iswctype (wc, wt)) ! { ! match_len = char_len; ! goto check_node_accept_bytes_match; ! } ! } # ifdef _LIBC if (nrules != 0) ! { ! unsigned int in_collseq = 0; ! const int32_t *table, *indirect; ! const unsigned char *weights, *extra; ! const char *collseqwc; ! int32_t idx; ! /* This #include defines a local function! */ # include ! /* match with collating_symbol? */ ! if (cset->ncoll_syms) ! extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); ! for (i = 0; i < cset->ncoll_syms; ++i) ! { ! const unsigned char *coll_sym = extra + cset->coll_syms[i]; ! /* Compare the length of input collating element and ! the length of current collating element. */ ! if (*coll_sym != elem_len) ! continue; ! /* Compare each bytes. */ ! for (j = 0; j < *coll_sym; j++) ! if (pin[j] != coll_sym[1 + j]) ! break; ! if (j == *coll_sym) ! { ! /* Match if every bytes is equal. */ ! match_len = j; ! goto check_node_accept_bytes_match; ! } ! } ! ! if (cset->nranges) ! { ! if (elem_len <= char_len) ! { ! collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); ! in_collseq = collseq_table_lookup (collseqwc, wc); ! } ! else ! in_collseq = find_collation_sequence_value (pin, elem_len); ! } ! /* match with range expression? */ ! for (i = 0; i < cset->nranges; ++i) ! if (cset->range_starts[i] <= in_collseq ! && in_collseq <= cset->range_ends[i]) ! { ! match_len = elem_len; ! goto check_node_accept_bytes_match; ! } ! ! /* match with equivalence_class? */ ! if (cset->nequiv_classes) ! { ! const unsigned char *cp = pin; ! table = (const int32_t *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); ! weights = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); ! extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); ! indirect = (const int32_t *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); ! idx = findidx (&cp); ! if (idx > 0) ! for (i = 0; i < cset->nequiv_classes; ++i) ! { ! int32_t equiv_class_idx = cset->equiv_classes[i]; ! size_t weight_len = weights[idx]; ! if (weight_len == weights[equiv_class_idx]) ! { ! int cnt = 0; ! while (cnt <= weight_len ! && (weights[equiv_class_idx + 1 + cnt] ! == weights[idx + 1 + cnt])) ! ++cnt; ! if (cnt > weight_len) ! { ! match_len = elem_len; ! goto check_node_accept_bytes_match; ! } ! } ! } ! } ! } else # endif /* _LIBC */ ! { ! /* match with range expression? */ #if __GNUC__ >= 2 ! wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; #else ! wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; ! cmp_buf[2] = wc; #endif ! for (i = 0; i < cset->nranges; ++i) ! { ! cmp_buf[0] = cset->range_starts[i]; ! cmp_buf[4] = cset->range_ends[i]; ! if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 ! && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) ! { ! match_len = char_len; ! goto check_node_accept_bytes_match; ! } ! } ! } check_node_accept_bytes_match: if (!cset->non_match) ! return match_len; else ! { ! if (match_len > 0) ! return 0; ! else ! return (elem_len > char_len) ? elem_len : char_len; ! } } return 0; } *************** *** 2996,3045 **** if (nrules == 0) { if (mbs_len == 1) ! { ! /* No valid character. Match it as a single byte character. */ ! const unsigned char *collseq = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); ! return collseq[mbs[0]]; ! } return UINT_MAX; } else { int32_t idx; const unsigned char *extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); for (idx = 0; ;) ! { ! int mbs_cnt, found = 0; ! int32_t elem_mbs_len; ! /* Skip the name of collating element name. */ ! idx = idx + extra[idx] + 1; ! elem_mbs_len = extra[idx++]; ! if (mbs_len == elem_mbs_len) ! { ! for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) ! if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) ! break; ! if (mbs_cnt == elem_mbs_len) ! /* Found the entry. */ ! found = 1; ! } ! /* Skip the byte sequence of the collating element. */ ! idx += elem_mbs_len; ! /* Adjust for the alignment. */ ! idx = (idx + 3) & ~3; ! /* Skip the collation sequence value. */ ! idx += sizeof (uint32_t); ! /* Skip the wide char sequence of the collating element. */ ! idx = idx + sizeof (uint32_t) * (extra[idx] + 1); ! /* If we found the entry, return the sequence value. */ ! if (found) ! return *(uint32_t *) (extra + idx); ! /* Skip the collation sequence value. */ ! idx += sizeof (uint32_t); ! } } } # endif /* _LIBC */ --- 3572,3621 ---- if (nrules == 0) { if (mbs_len == 1) ! { ! /* No valid character. Match it as a single byte character. */ ! const unsigned char *collseq = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); ! return collseq[mbs[0]]; ! } return UINT_MAX; } else { int32_t idx; const unsigned char *extra = (const unsigned char *) ! _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); for (idx = 0; ;) ! { ! int mbs_cnt, found = 0; ! int32_t elem_mbs_len; ! /* Skip the name of collating element name. */ ! idx = idx + extra[idx] + 1; ! elem_mbs_len = extra[idx++]; ! if (mbs_len == elem_mbs_len) ! { ! for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) ! if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) ! break; ! if (mbs_cnt == elem_mbs_len) ! /* Found the entry. */ ! found = 1; ! } ! /* Skip the byte sequence of the collating element. */ ! idx += elem_mbs_len; ! /* Adjust for the alignment. */ ! idx = (idx + 3) & ~3; ! /* Skip the collation sequence value. */ ! idx += sizeof (uint32_t); ! /* Skip the wide char sequence of the collating element. */ ! idx = idx + sizeof (uint32_t) * (extra[idx] + 1); ! /* If we found the entry, return the sequence value. */ ! if (found) ! return *(uint32_t *) (extra + idx); ! /* Skip the collation sequence value. */ ! idx += sizeof (uint32_t); ! } } } # endif /* _LIBC */ *************** *** 3059,3070 **** if (node->constraint) { /* The node has constraints. Check whether the current context ! satisfies the constraints. */ unsigned int context = re_string_context_at (mctx->input, idx, ! mctx->eflags, ! preg->newline_anchor); if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) ! return 0; } ch = re_string_byte_at (mctx->input, idx); if (node->type == CHARACTER) --- 3635,3646 ---- if (node->constraint) { /* The node has constraints. Check whether the current context ! satisfies the constraints. */ unsigned int context = re_string_context_at (mctx->input, idx, ! mctx->eflags, ! preg->newline_anchor); if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) ! return 0; } ch = re_string_byte_at (mctx->input, idx); if (node->type == CHARACTER) *************** *** 3073,3079 **** return bitset_contain (node->opr.sbcset, ch); else if (node->type == OP_PERIOD) return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE)) ! || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL))); else return 0; } --- 3649,3655 ---- return bitset_contain (node->opr.sbcset, ch); else if (node->type == OP_PERIOD) return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE)) ! || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL))); else return 0; } *************** *** 3095,3104 **** if (mctx->state_log != NULL) { /* And double the length of state_log. */ ! mctx->state_log = re_realloc (mctx->state_log, re_dfastate_t *, ! pstr->bufs_len * 2); ! if (BE (mctx->state_log == NULL, 0)) ! return REG_ESPACE; } /* Then reconstruct the buffers. */ --- 3671,3682 ---- if (mctx->state_log != NULL) { /* And double the length of state_log. */ ! re_dfastate_t **new_array; ! new_array = re_realloc (mctx->state_log, re_dfastate_t *, ! pstr->bufs_len * 2); ! if (BE (new_array == NULL, 0)) ! return REG_ESPACE; ! mctx->state_log = new_array; } /* Then reconstruct the buffers. */ *************** *** 3106,3129 **** { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_upper_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! build_upper_buffer (pstr); } else { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! { ! if (pstr->trans != NULL) ! re_string_translate_buffer (pstr); ! else ! pstr->valid_len = pstr->bufs_len; ! } } return REG_NOERROR; } --- 3684,3707 ---- { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_upper_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! build_upper_buffer (pstr); } else { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) ! build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ ! { ! if (pstr->trans != NULL) ! re_string_translate_buffer (pstr); ! else ! pstr->valid_len = pstr->bufs_len; ! } } return REG_NOERROR; } *************** *** 3143,3161 **** if (n > 0) { mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); ! if (BE (mctx->bkref_ents == NULL, 0)) ! return REG_ESPACE; } else mctx->bkref_ents = NULL; mctx->nbkref_ents = 0; mctx->abkref_ents = n; ! mctx->max_mb_elem_len = 0; ! mctx->cache_node = -1; ! mctx->cache_str_idx = -1; ! mctx->cache_first_idx = 0; ! mctx->cache_last_idx = 0; ! return REG_NOERROR; } --- 3721,3737 ---- if (n > 0) { mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); ! mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); ! if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) ! return REG_ESPACE; } else mctx->bkref_ents = NULL; mctx->nbkref_ents = 0; mctx->abkref_ents = n; ! mctx->max_mb_elem_len = 1; ! mctx->nsub_tops = 0; ! mctx->asub_tops = n; return REG_NOERROR; } *************** *** 3163,3210 **** match_ctx_free (mctx) re_match_context_t *mctx; { re_free (mctx->bkref_ents); } /* Add a new backreference entry to the cache. */ static reg_errcode_t ! match_ctx_add_entry (mctx, dfa, node, str_idx, from, to) ! re_match_context_t *mctx; ! re_dfa_t *dfa; ! int node, str_idx, from, to; { if (mctx->nbkref_ents >= mctx->abkref_ents) { ! mctx->bkref_ents = re_realloc (mctx->bkref_ents, ! struct re_backref_cache_entry, ! mctx->abkref_ents * 2); ! if (BE (mctx->bkref_ents == NULL, 0)) ! return REG_ESPACE; memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', ! sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); mctx->abkref_ents *= 2; } - - if (mctx->cache_node == node && mctx->cache_str_idx == str_idx) - mctx->cache_last_idx = mctx->nbkref_ents; - mctx->bkref_ents[mctx->nbkref_ents].node = node; mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; - mctx->bkref_ents[mctx->nbkref_ents].hash = str_idx + (node << 16); mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; ! mctx->bkref_ents[mctx->nbkref_ents].subexp_len = to - from; ! mctx->bkref_ents[mctx->nbkref_ents].dst_node = to == from ! ? dfa->edests[node].elems[0] : dfa->nexts[node]; ! mctx->bkref_ents[mctx->nbkref_ents++].flag = 0; if (mctx->max_mb_elem_len < to - from) mctx->max_mb_elem_len = to - from; - return REG_NOERROR; } static void match_ctx_clear_flag (mctx) re_match_context_t *mctx; --- 3739,3835 ---- match_ctx_free (mctx) re_match_context_t *mctx; { + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + if (last->limits) + re_node_set_free (last->limits); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + if (top->limits) + re_node_set_free (top->limits); + free (top); + } + re_free (mctx->sub_tops); re_free (mctx->bkref_ents); } /* Add a new backreference entry to the cache. */ static reg_errcode_t ! match_ctx_add_entry (mctx, node, str_idx, from, to, top, last) ! re_match_context_t *mctx; ! int node, str_idx, from, to; ! re_sub_match_top_t *top; ! re_sub_match_last_t *last; { + /* TODO: It can be more efficient. */ + int i; + for (i = 0; i < mctx->nbkref_ents; ++i) + { + struct re_backref_cache_entry *entry = mctx->bkref_ents + i; + if (entry->node == node && entry->str_idx == str_idx + && entry->subexp_from == from && entry->subexp_to == to) + return REG_NOERROR; + } + if (mctx->nbkref_ents >= mctx->abkref_ents) { ! struct re_backref_cache_entry* new_entry; ! new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, ! mctx->abkref_ents * 2); ! if (BE (new_entry == NULL, 0)) ! { ! re_free (mctx->bkref_ents); ! return REG_ESPACE; ! } ! mctx->bkref_ents = new_entry; memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', ! sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); mctx->abkref_ents *= 2; } mctx->bkref_ents[mctx->nbkref_ents].node = node; mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; ! mctx->bkref_ents[mctx->nbkref_ents].top = top; ! mctx->bkref_ents[mctx->nbkref_ents].last = last; mctx->bkref_ents[mctx->nbkref_ents++].flag = 0; if (mctx->max_mb_elem_len < to - from) mctx->max_mb_elem_len = to - from; return REG_NOERROR; } + static int + search_cur_bkref_entry (mctx, str_idx) + re_match_context_t *mctx; + int str_idx; + { + int left, right, mid; + right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + return left; + } + static void match_ctx_clear_flag (mctx) re_match_context_t *mctx; *************** *** 3216,3224 **** } } static void sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx, ! check_subexp) re_sift_context_t *sctx; re_dfastate_t **sifted_sts, **limited_sts; int last_node, last_str_idx, check_subexp; --- 3841,3897 ---- } } + static reg_errcode_t + match_ctx_add_subtop (mctx, node, str_idx) + re_match_context_t *mctx; + int node, str_idx; + { + #ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); + #endif + if (mctx->nsub_tops == mctx->asub_tops) + { + mctx->asub_tops *= 2; + mctx->sub_tops = re_realloc (mctx->sub_tops, re_sub_match_top_t *, + mctx->asub_tops); + if (BE (mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (mctx->sub_tops[mctx->nsub_tops] == NULL) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; + } + + static re_sub_match_last_t * + match_ctx_add_sublast (subtop, node, str_idx) + re_sub_match_top_t *subtop; + int node, str_idx; + { + re_sub_match_last_t *new_entry; + if (subtop->nlasts == subtop->alasts) + { + subtop->alasts = 2 * subtop->alasts + 1; + subtop->lasts = re_realloc (subtop->lasts, re_sub_match_last_t *, + subtop->alasts); + if (BE (subtop->lasts == NULL, 0)) + return NULL; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + new_entry->limits = NULL; + ++subtop->nlasts; + return new_entry; + } + static void sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx, ! check_subexp) re_sift_context_t *sctx; re_dfastate_t **sifted_sts, **limited_sts; int last_node, last_str_idx, check_subexp; *************** *** 3237,3243 **** It also tries to limit the startup time as much as possible. */ static int my_memcmp (char *s1, char *s2, unsigned int l) { ! if (BE (l, 1) != 0) while (BE(*s1 == *s2, 1) && BE(--l, 0) != 0) s1++, s2++; --- 3910,3916 ---- It also tries to limit the startup time as much as possible. */ static int my_memcmp (char *s1, char *s2, unsigned int l) { ! if (BE(l, 1) != 0) while (BE(*s1 == *s2, 1) && BE(--l, 0) != 0) s1++, s2++; diff -rNC3 sed-4.0.3/lib/snprintf.c sed-4.0.4/lib/snprintf.c *** sed-4.0.3/lib/snprintf.c Wed Jan 23 17:03:14 2002 --- sed-4.0.4/lib/snprintf.c Wed Nov 27 19:21:03 2002 *************** *** 319,325 **** { va_list args; int ret; ! #ifndef HAVE_STDARG_H char *str; size_t sz; char *format; --- 319,325 ---- { va_list args; int ret; ! #ifdef HAVE_STDARG_H char *str; size_t sz; char *format; Binary files sed-4.0.3/po/ca.gmo and sed-4.0.4/po/ca.gmo differ diff -rNC3 sed-4.0.3/po/ca.po sed-4.0.4/po/ca.po *** sed-4.0.3/po/ca.po Mon Oct 28 08:07:40 2002 --- sed-4.0.4/po/ca.po Thu Dec 12 19:56:11 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-06-20 17:12+0200\n" "Last-Translator: Jordi Mallach \n" "Language-Team: Catalan \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-06-20 17:12+0200\n" "Last-Translator: Jordi Mallach \n" "Language-Team: Catalan \n" *************** *** 157,171 **** msgstr "ERROR INTERN: Ordre errònia %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 157,171 ---- msgstr "ERROR INTERN: Ordre errònia %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/cs.gmo and sed-4.0.4/po/cs.gmo differ diff -rNC3 sed-4.0.3/po/cs.po sed-4.0.4/po/cs.po *** sed-4.0.3/po/cs.po Mon Oct 28 08:07:40 2002 --- sed-4.0.4/po/cs.po Thu Dec 12 19:56:11 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2001-08-05 19:52+02:00\n" "Last-Translator: Vladimir Michl \n" "Language-Team: Czech \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2001-08-05 19:52+02:00\n" "Last-Translator: Vladimir Michl \n" "Language-Team: Czech \n" *************** *** 157,171 **** msgstr "INTERNÍ CHYBA: ¹patný pøíkaz %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 157,171 ---- msgstr "INTERNÍ CHYBA: ¹patný pøíkaz %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/da.gmo and sed-4.0.4/po/da.gmo differ diff -rNC3 sed-4.0.3/po/da.po sed-4.0.4/po/da.po *** sed-4.0.3/po/da.po Mon Oct 28 08:07:41 2002 --- sed-4.0.4/po/da.po Thu Dec 12 19:56:11 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-25 22:28+0200\n" "Last-Translator: Byrial Ole Jensen \n" "Language-Team: Danish \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-25 22:28+0200\n" "Last-Translator: Byrial Ole Jensen \n" "Language-Team: Danish \n" *************** *** 158,172 **** msgstr "INTERN FEJL: Forkert værdi af cmd %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Intet forudgående regulært udtryk" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Der kan ikke angives ændrere til tomt regulært udtryk" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ugyldig reference \\%d på 's'-kommandos højreside" --- 158,172 ---- msgstr "INTERN FEJL: Forkert værdi af cmd %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Intet forudgående regulært udtryk" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Der kan ikke angives ændrere til tomt regulært udtryk" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ugyldig reference \\%d på 's'-kommandos højreside" Binary files sed-4.0.3/po/de.gmo and sed-4.0.4/po/de.gmo differ diff -rNC3 sed-4.0.3/po/de.po sed-4.0.4/po/de.po *** sed-4.0.3/po/de.po Fri Nov 8 20:11:21 2002 --- sed-4.0.4/po/de.po Thu Dec 12 19:56:11 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0.1\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-11-07 16:03:41+0100\n" "Last-Translator: Walter Koch \n" "Language-Team: German \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0.1\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-11-07 16:03:41+0100\n" "Last-Translator: Walter Koch \n" "Language-Team: German \n" *************** *** 158,172 **** msgstr "INTERNER FEHLER: Falscher Befehl %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Kein vorheriger regulärer Ausdruck" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Auf leere reguläre Ausdrücke können keine `modifier' angewandt werden" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ungültiger Verweis \\%d in den Haltepuffer des `s'-Befehls" --- 158,172 ---- msgstr "INTERNER FEHLER: Falscher Befehl %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Kein vorheriger regulärer Ausdruck" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Auf leere reguläre Ausdrücke können keine `modifier' angewandt werden" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ungültiger Verweis \\%d in den Haltepuffer des `s'-Befehls" Binary files sed-4.0.3/po/el.gmo and sed-4.0.4/po/el.gmo differ diff -rNC3 sed-4.0.3/po/el.po sed-4.0.4/po/el.po *** sed-4.0.3/po/el.po Mon Oct 28 08:07:41 2002 --- sed-4.0.4/po/el.po Thu Dec 12 19:56:12 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: GNU sed 3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-03-08 12:57+0000\n" "Last-Translator: Simos Xenitellis \n" "Language-Team: Greek \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: GNU sed 3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-03-08 12:57+0000\n" "Last-Translator: Simos Xenitellis \n" "Language-Team: Greek \n" *************** *** 157,171 **** msgstr "ÅÓÙÔÅÑÉÊÏ ÓÖÁËÌÁ: ÊáêÞ åíôïëÞ %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Äåí âñÝèçêå ðñïçãïýìåíç êáíïíéêÞ Ýêöñáóç" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "äåí åðéôñÝðåôáé ï ïñéóìüò äéáìïñöùôþí óå êåíÞ êáíïíéêÞ Ýêöñáóç" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ìç Ýãêõñç áíáöïñÜ \\%d óôï äåîß ôìÞìá ôçò åíôïëÞò `s'" --- 157,171 ---- msgstr "ÅÓÙÔÅÑÉÊÏ ÓÖÁËÌÁ: ÊáêÞ åíôïëÞ %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Äåí âñÝèçêå ðñïçãïýìåíç êáíïíéêÞ Ýêöñáóç" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "äåí åðéôñÝðåôáé ï ïñéóìüò äéáìïñöùôþí óå êåíÞ êáíïíéêÞ Ýêöñáóç" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ìç Ýãêõñç áíáöïñÜ \\%d óôï äåîß ôìÞìá ôçò åíôïëÞò `s'" Binary files sed-4.0.3/po/eo.gmo and sed-4.0.4/po/eo.gmo differ diff -rNC3 sed-4.0.3/po/eo.po sed-4.0.4/po/eo.po *** sed-4.0.3/po/eo.po Mon Oct 28 08:39:38 2002 --- sed-4.0.4/po/eo.po Thu Dec 12 19:56:12 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: GNU sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-27 18:00+0000\n" "Last-Translator: Edmund GRIMLEY EVANS \n" "Language-Team: Esperanto \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: GNU sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-27 18:00+0000\n" "Last-Translator: Edmund GRIMLEY EVANS \n" "Language-Team: Esperanto \n" *************** *** 158,172 **** msgstr "INTERNA ERARO: Malbona komando %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Mankas antaÅ­a regula esprimo" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Ne eblas specifi modifilojn ĉe malplena regula esprimo" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Nevalida referenco \\%d ĉe dekstra flanko de komando 's'" --- 158,172 ---- msgstr "INTERNA ERARO: Malbona komando %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Mankas antaÅ­a regula esprimo" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Ne eblas specifi modifilojn ĉe malplena regula esprimo" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Nevalida referenco \\%d ĉe dekstra flanko de komando 's'" Binary files sed-4.0.3/po/es.gmo and sed-4.0.4/po/es.gmo differ diff -rNC3 sed-4.0.3/po/es.po sed-4.0.4/po/es.po *** sed-4.0.3/po/es.po Tue Oct 29 20:25:34 2002 --- sed-4.0.4/po/es.po Thu Dec 12 19:56:12 2002 *************** *** 4,12 **** # msgid "" msgstr "" ! "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" ! "PO-Revision-Date: 2002-10-28 16:20-0600\n" "Last-Translator: Cristian Othón Martínez Vera \n" "Language-Team: Spanish \n" "MIME-Version: 1.0\n" --- 4,12 ---- # msgid "" msgstr "" ! "Project-Id-Version: sed 4.0.2\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" ! "PO-Revision-Date: 2002-11-22 17:13-0600\n" "Last-Translator: Cristian Othón Martínez Vera \n" "Language-Team: Spanish \n" "MIME-Version: 1.0\n" *************** *** 159,173 **** msgstr "ERROR INTERNO: cmd %c erróneo" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "No hay una expresión regular previa" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "No se pueden especificar modificadores en expresiones regulares vacías" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referencia \\%d inválida en el lado derecho del comando `s'" --- 159,173 ---- msgstr "ERROR INTERNO: cmd %c erróneo" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "No hay una expresión regular previa" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "No se pueden especificar modificadores en expresiones regulares vacías" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referencia \\%d inválida en el lado derecho del comando `s'" Binary files sed-4.0.3/po/et.gmo and sed-4.0.4/po/et.gmo differ diff -rNC3 sed-4.0.3/po/et.po sed-4.0.4/po/et.po *** sed-4.0.3/po/et.po Mon Oct 28 08:07:42 2002 --- sed-4.0.4/po/et.po Thu Dec 12 19:56:12 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-25 11:00+03:00\n" "Last-Translator: Toomas Soome \n" "Language-Team: Estonian \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-25 11:00+03:00\n" "Last-Translator: Toomas Soome \n" "Language-Team: Estonian \n" *************** *** 158,172 **** msgstr "SISEMINE VIGA: Halb käsk %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Eelmist regulaaravaldist pole" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Muudatusi tühjale regulaaravaldisele ei saa määrata" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Vigane viide \\%d käsu `s' paremas pooles" --- 158,172 ---- msgstr "SISEMINE VIGA: Halb käsk %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Eelmist regulaaravaldist pole" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Muudatusi tühjale regulaaravaldisele ei saa määrata" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Vigane viide \\%d käsu `s' paremas pooles" Binary files sed-4.0.3/po/fi.gmo and sed-4.0.4/po/fi.gmo differ diff -rNC3 sed-4.0.3/po/fi.po sed-4.0.4/po/fi.po *** sed-4.0.3/po/fi.po Mon Oct 28 08:07:42 2002 --- sed-4.0.4/po/fi.po Thu Dec 12 19:56:12 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed-3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-07-09 13:02+0200\n" "Last-Translator: Sami J. Laine \n" "Language-Team: Finnish \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed-3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-07-09 13:02+0200\n" "Last-Translator: Sami J. Laine \n" "Language-Team: Finnish \n" *************** *** 157,171 **** msgstr "SISÄINEN VIRHE: Virheellinen komento %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 157,171 ---- msgstr "SISÄINEN VIRHE: Virheellinen komento %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/fr.gmo and sed-4.0.4/po/fr.gmo differ diff -rNC3 sed-4.0.3/po/fr.po sed-4.0.4/po/fr.po *** sed-4.0.3/po/fr.po Tue Nov 19 20:40:42 2002 --- sed-4.0.4/po/fr.po Thu Dec 12 19:56:12 2002 *************** *** 9,15 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0.1\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-11-12 11:28+0100\n" "Last-Translator: Gaël Quéri \n" "Language-Team: French \n" --- 9,15 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0.1\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-11-12 11:28+0100\n" "Last-Translator: Gaël Quéri \n" "Language-Team: French \n" *************** *** 164,180 **** msgstr "ERREUR INTERNE: mauvaise commande %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Pas d'expression régulière précédente" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" "Impossible de spécifier des modifieurs sur une expression\n" "rationnelle vide" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Référence \\%d invalide dans le côté droit de la commande `s'" --- 164,180 ---- msgstr "ERREUR INTERNE: mauvaise commande %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Pas d'expression régulière précédente" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" "Impossible de spécifier des modifieurs sur une expression\n" "rationnelle vide" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Référence \\%d invalide dans le côté droit de la commande `s'" Binary files sed-4.0.3/po/gl.gmo and sed-4.0.4/po/gl.gmo differ diff -rNC3 sed-4.0.3/po/gl.po sed-4.0.4/po/gl.po *** sed-4.0.3/po/gl.po Mon Oct 28 08:07:42 2002 --- sed-4.0.4/po/gl.po Thu Dec 12 19:56:12 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-25 15:57+0200\n" "Last-Translator: Jacobo Tarrío Barreiro \n" "Language-Team: Galician \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-25 15:57+0200\n" "Last-Translator: Jacobo Tarrío Barreiro \n" "Language-Team: Galician \n" *************** *** 158,172 **** msgstr "ERRO INTERNO: instrucción %c incorrecta" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Non hai unha expresión regular anterior" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Non se poden especificar modificadores nunha expresión regular baleira" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referencia \\%d non válida no lado dereito do comando `s'" --- 158,172 ---- msgstr "ERRO INTERNO: instrucción %c incorrecta" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Non hai unha expresión regular anterior" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Non se poden especificar modificadores nunha expresión regular baleira" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referencia \\%d non válida no lado dereito do comando `s'" Binary files sed-4.0.3/po/he.gmo and sed-4.0.4/po/he.gmo differ diff -rNC3 sed-4.0.3/po/he.po sed-4.0.4/po/he.po *** sed-4.0.3/po/he.po Mon Oct 28 08:07:42 2002 --- sed-4.0.4/po/he.po Thu Dec 12 19:56:13 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2001-08-04 20:37+0300\n" "Last-Translator: Eli Zaretskii \n" "Language-Team: Hebrew \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2001-08-04 20:37+0300\n" "Last-Translator: Eli Zaretskii \n" "Language-Team: Hebrew \n" *************** *** 157,171 **** msgstr "%c úùáåùî äãå÷ô :äøåîç äðëú úì÷ú" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 157,171 ---- msgstr "%c úùáåùî äãå÷ô :äøåîç äðëú úì÷ú" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/hr.gmo and sed-4.0.4/po/hr.gmo differ diff -rNC3 sed-4.0.3/po/hr.po sed-4.0.4/po/hr.po *** sed-4.0.3/po/hr.po Mon Oct 28 08:07:42 2002 --- sed-4.0.4/po/hr.po Thu Dec 12 19:56:13 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-06-14 15:17-01\n" "Last-Translator: Denis Lackovic \n" "Language-Team: Croatian \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-06-14 15:17-01\n" "Last-Translator: Denis Lackovic \n" "Language-Team: Croatian \n" *************** *** 159,173 **** msgstr "INTERNA GREÅ KA: Neispravna naredba %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Nedostaje prethodni regularni izraz" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 159,173 ---- msgstr "INTERNA GREÅ KA: Neispravna naredba %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Nedostaje prethodni regularni izraz" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/hu.gmo and sed-4.0.4/po/hu.gmo differ diff -rNC3 sed-4.0.3/po/hu.po sed-4.0.4/po/hu.po *** sed-4.0.3/po/hu.po Mon Oct 28 08:07:43 2002 --- sed-4.0.4/po/hu.po Thu Dec 12 19:56:13 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed-3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-09-18 14:43GMT\n" "Last-Translator: Gábor István \n" "Language-Team: Hungarian \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed-3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-09-18 14:43GMT\n" "Last-Translator: Gábor István \n" "Language-Team: Hungarian \n" *************** *** 158,172 **** msgstr "BELSÕ HIBA: Rossz parancs %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 158,172 ---- msgstr "BELSÕ HIBA: Rossz parancs %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/id.gmo and sed-4.0.4/po/id.gmo differ diff -rNC3 sed-4.0.3/po/id.po sed-4.0.4/po/id.po *** sed-4.0.3/po/id.po Tue Oct 29 20:25:36 2002 --- sed-4.0.4/po/id.po Thu Dec 12 19:56:13 2002 *************** *** 1,12 **** ! # sed 4.0 (Indonesian) # Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. # Tedi Heriyanto , 2002. # msgid "" msgstr "" ! "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" ! "PO-Revision-Date: 2002-10-28 22:52+0700\n" "Last-Translator: Tedi Heriyanto \n" "Language-Team: Indonesian \n" "MIME-Version: 1.0\n" --- 1,12 ---- ! # sed 4.0.2 (Indonesian) # Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. # Tedi Heriyanto , 2002. # msgid "" msgstr "" ! "Project-Id-Version: sed 4.0.2\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" ! "PO-Revision-Date: 2002-11-22 12:24GMT+0700\n" "Last-Translator: Tedi Heriyanto \n" "Language-Team: Indonesian \n" "MIME-Version: 1.0\n" *************** *** 159,173 **** msgstr "KESALAHAN INTERNAL: Perintah %c buruk" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Tidak ada reguler ekspresi sebelumnya" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Tidak dapat menspesifikasikan modified pada regexp kosong" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referensi tidak valid \\%d pada perintah `s' RHS" --- 159,173 ---- msgstr "KESALAHAN INTERNAL: Perintah %c buruk" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Tidak ada reguler ekspresi sebelumnya" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Tidak dapat menspesifikasikan modified pada regexp kosong" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referensi tidak valid \\%d pada perintah `s' RHS" Binary files sed-4.0.3/po/it.gmo and sed-4.0.4/po/it.gmo differ diff -rNC3 sed-4.0.3/po/it.po sed-4.0.4/po/it.po *** sed-4.0.3/po/it.po Mon Oct 28 08:07:43 2002 --- sed-4.0.4/po/it.po Thu Dec 12 19:56:13 2002 *************** *** 1,176 **** ! # traduzione di sed ! # Copyright (C) 1999 Free Software Foundation, Inc. ! # Paolo Bonzini , 2001 ! # msgid "" msgstr "" ! "Project-Id-Version: ssed 3.46\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" ! "PO-Revision-Date: 2000-12-21 12:44+0100\n" "Last-Translator: Paolo Bonzini \n" ! "Language-Team: Italian \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=ISO-8859-1\n" "Content-Transfer-Encoding: 8-bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" #. Various error messages we may want to print #: sed/compile.c:165 msgid "Invalid use of address modifier" msgstr "Uso non valido del modificatore dell'indirizzo" #: sed/compile.c:166 msgid "Multiple `!'s" msgstr "`!' multipli" #: sed/compile.c:167 msgid "Unexpected `,'" msgstr "`,' inattesa" #: sed/compile.c:168 msgid "Unexpected End-of-file" msgstr "fine del file inattesa" #: sed/compile.c:169 msgid "Cannot use +N or ~N as first address" msgstr "Impossibile usare +N o ~N come primo indirizzo" #: sed/compile.c:170 msgid "Unmatched `{'" msgstr "`{' non bilanciata" #: sed/compile.c:171 msgid "Unexpected `}'" msgstr "`}' inattesa" #: sed/compile.c:172 msgid "Extra characters after command" msgstr "Ci sono altri caratteri dopo il comando" #: sed/compile.c:173 msgid "Expected \\ after `a', `c' or `i'" msgstr "Atteso \\ dopo `a', `c' o `i'" #: sed/compile.c:174 msgid "`}' doesn't want any addresses" msgstr "`}' non accetta indirizzi" #: sed/compile.c:175 msgid ": doesn't want any addresses" msgstr ": non accetta indirizzi" #: sed/compile.c:176 msgid "Comments don't accept any addresses" msgstr "I commenti non accettano indirizzi" #: sed/compile.c:177 msgid "Missing command" msgstr "Manca il comando" #: sed/compile.c:178 msgid "Command only uses one address" msgstr "Il comando usa solo un indirizzo" #: sed/compile.c:179 msgid "Unterminated address regex" msgstr "Indirizzo della regex non terminato" #: sed/compile.c:180 msgid "Unterminated `s' command" msgstr "Comando `s' non terminato" #: sed/compile.c:181 msgid "Unterminated `y' command" msgstr "Comando `y' non terminato" #: sed/compile.c:182 msgid "Unknown option to `s'" msgstr "Opzione di `s' sconosciuta" #: sed/compile.c:183 msgid "multiple `p' options to `s' command" msgstr "opzioni `p' multiple al comando `s'" #: sed/compile.c:184 msgid "multiple `g' options to `s' command" msgstr "opzioni `g' multiple al comando `s'" #: sed/compile.c:186 msgid "multiple number options to `s' command" msgstr "opzioni numeriche multiple al comando `s'" #: sed/compile.c:188 msgid "number option to `s' command may not be zero" msgstr "l'opzione numerica del comando `s' non può essere zero" #: sed/compile.c:190 msgid "strings for y command are different lengths" msgstr "le stringhe per i comandi y hanno lunghezze diverse" #: sed/compile.c:233 #, c-format msgid "Called savchar() with unexpected pushback (%x)" msgstr "Chiamata savchar() con un pushback inatteso (%x)" #: sed/compile.c:1319 msgid "Unknown command:" msgstr "Comando sconosciuto:" #: sed/compile.c:1340 #, c-format msgid "%s: file %s line %lu: %s\n" msgstr "%s: file %s riga %lu: %s\n" #: sed/compile.c:1343 #, c-format msgid "%s: -e expression #%lu, char %lu: %s\n" msgstr "%s: espressione -e #%lu, carattere %lu: %s\n" #: sed/compile.c:1543 #, c-format msgid "Can't find label for jump to `%s'" msgstr "Impossibile trovare un'etichetta per il salto a `%s'" #: sed/execute.c:516 #, c-format msgid "%s: can't read %s: %s\n" msgstr "%s: impossibile leggere %s: %s\n" #: sed/execute.c:722 msgid "INTERNAL ERROR: bad address type" msgstr "INTERNAL ERROR: bad address type" #: sed/execute.c:1003 sed/execute.c:1183 msgid "error in subprocess" msgstr "errore in un sottoprocesso" #: sed/execute.c:1005 msgid "option `e' not supported" msgstr "opzione `e' non supportata" #: sed/execute.c:1185 msgid "`e' command not supported" msgstr "comando `e' non supportato" #: sed/execute.c:1415 #, c-format msgid "INTERNAL ERROR: Bad cmd %c" msgstr "INTERNAL ERROR: Bad cmd %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Occorre un'espressione regolare precedente" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Non e' possibile specificare dei modificatori per l'espressione vuota" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Riferimento non valido \\%d nel secondo membro del comando `s'" #: sed/sed.c:98 msgid "" " -R, --regexp-perl\n" --- 1,214 ---- ! # traduzione di sed ! # Copyright (C) 1999 Free Software Foundation, Inc. ! # Paolo Bonzini , 2001 ! # msgid "" msgstr "" ! "Project-Id-Version: sed 4.0.3\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" ! "PO-Revision-Date: 2002-11-26 12:44+0100\n" "Last-Translator: Paolo Bonzini \n" ! "Language-Team: Italian \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=ISO-8859-1\n" "Content-Transfer-Encoding: 8-bit\n" "Plural-Forms: nplurals=2; plural=n != 1;\n" + # sed/compile.c:165 #. Various error messages we may want to print #: sed/compile.c:165 msgid "Invalid use of address modifier" msgstr "Uso non valido del modificatore dell'indirizzo" + # sed/compile.c:166 #: sed/compile.c:166 msgid "Multiple `!'s" msgstr "`!' multipli" + # sed/compile.c:167 #: sed/compile.c:167 msgid "Unexpected `,'" msgstr "`,' inattesa" + # sed/compile.c:168 #: sed/compile.c:168 msgid "Unexpected End-of-file" msgstr "fine del file inattesa" + # sed/compile.c:169 #: sed/compile.c:169 msgid "Cannot use +N or ~N as first address" msgstr "Impossibile usare +N o ~N come primo indirizzo" + # sed/compile.c:170 #: sed/compile.c:170 msgid "Unmatched `{'" msgstr "`{' non bilanciata" + # sed/compile.c:171 #: sed/compile.c:171 msgid "Unexpected `}'" msgstr "`}' inattesa" + # sed/compile.c:172 #: sed/compile.c:172 msgid "Extra characters after command" msgstr "Ci sono altri caratteri dopo il comando" + # sed/compile.c:173 #: sed/compile.c:173 msgid "Expected \\ after `a', `c' or `i'" msgstr "Atteso \\ dopo `a', `c' o `i'" + # sed/compile.c:174 #: sed/compile.c:174 msgid "`}' doesn't want any addresses" msgstr "`}' non accetta indirizzi" + # sed/compile.c:175 #: sed/compile.c:175 msgid ": doesn't want any addresses" msgstr ": non accetta indirizzi" + # sed/compile.c:176 #: sed/compile.c:176 msgid "Comments don't accept any addresses" msgstr "I commenti non accettano indirizzi" + # sed/compile.c:177 #: sed/compile.c:177 msgid "Missing command" msgstr "Manca il comando" + # sed/compile.c:178 #: sed/compile.c:178 msgid "Command only uses one address" msgstr "Il comando usa solo un indirizzo" + # sed/compile.c:179 #: sed/compile.c:179 msgid "Unterminated address regex" msgstr "Indirizzo della regex non terminato" + # sed/compile.c:180 #: sed/compile.c:180 msgid "Unterminated `s' command" msgstr "Comando `s' non terminato" + # sed/compile.c:181 #: sed/compile.c:181 msgid "Unterminated `y' command" msgstr "Comando `y' non terminato" + # sed/compile.c:182 #: sed/compile.c:182 msgid "Unknown option to `s'" msgstr "Opzione di `s' sconosciuta" + # sed/compile.c:183 #: sed/compile.c:183 msgid "multiple `p' options to `s' command" msgstr "opzioni `p' multiple al comando `s'" + # sed/compile.c:184 #: sed/compile.c:184 msgid "multiple `g' options to `s' command" msgstr "opzioni `g' multiple al comando `s'" + # sed/compile.c:186 #: sed/compile.c:186 msgid "multiple number options to `s' command" msgstr "opzioni numeriche multiple al comando `s'" + # sed/compile.c:188 #: sed/compile.c:188 msgid "number option to `s' command may not be zero" msgstr "l'opzione numerica del comando `s' non può essere zero" + # sed/compile.c:190 #: sed/compile.c:190 msgid "strings for y command are different lengths" msgstr "le stringhe per i comandi y hanno lunghezze diverse" + # sed/compile.c:233 #: sed/compile.c:233 #, c-format msgid "Called savchar() with unexpected pushback (%x)" msgstr "Chiamata savchar() con un pushback inatteso (%x)" + # sed/compile.c:1319 #: sed/compile.c:1319 msgid "Unknown command:" msgstr "Comando sconosciuto:" + # sed/compile.c:1340 #: sed/compile.c:1340 #, c-format msgid "%s: file %s line %lu: %s\n" msgstr "%s: file %s riga %lu: %s\n" + # sed/compile.c:1343 #: sed/compile.c:1343 #, c-format msgid "%s: -e expression #%lu, char %lu: %s\n" msgstr "%s: espressione -e #%lu, carattere %lu: %s\n" + # sed/compile.c:1543 #: sed/compile.c:1543 #, c-format msgid "Can't find label for jump to `%s'" msgstr "Impossibile trovare un'etichetta per il salto a `%s'" + # sed/execute.c:516 #: sed/execute.c:516 #, c-format msgid "%s: can't read %s: %s\n" msgstr "%s: impossibile leggere %s: %s\n" + # sed/execute.c:722 #: sed/execute.c:722 msgid "INTERNAL ERROR: bad address type" msgstr "INTERNAL ERROR: bad address type" + # sed/execute.c:1003 sed/execute.c:1183 #: sed/execute.c:1003 sed/execute.c:1183 msgid "error in subprocess" msgstr "errore in un sottoprocesso" + # sed/execute.c:1005 #: sed/execute.c:1005 msgid "option `e' not supported" msgstr "opzione `e' non supportata" + # sed/execute.c:1185 #: sed/execute.c:1185 msgid "`e' command not supported" msgstr "comando `e' non supportato" + # sed/execute.c:1415 #: sed/execute.c:1415 #, c-format msgid "INTERNAL ERROR: Bad cmd %c" msgstr "INTERNAL ERROR: Bad cmd %c" + # lib/regcomp.c:658 sed/regex.c:47 #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Occorre un'espressione regolare precedente" ! # sed/regex.c:48 ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Non e' possibile specificare dei modificatori per l'espressione vuota" ! # sed/regex.c:146 ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Riferimento non valido \\%d nel secondo membro del comando `s'" + # sed/sed.c:98 #: sed/sed.c:98 msgid "" " -R, --regexp-perl\n" *************** *** 179,184 **** --- 217,223 ---- " -R, --regexp-perl\n" " usa la sintassi Perl 5 per le espressioni regolari\n" + # sed/sed.c:103 #: sed/sed.c:103 #, c-format msgid "" *************** *** 247,252 **** --- 286,292 ---- "sono specificati file di input sarà letto lo standard input.\n" "\n" + # sed/sed.c:132 #: sed/sed.c:132 #, c-format msgid "" *************** *** 256,266 **** --- 296,308 ---- "Segnalare eventuali bug a: %s .\n" "Assicurarsi di includere la parola ``%s'' nell'oggetto del messaggio.\n" + # sed/sed.c:255 #: sed/sed.c:255 #, c-format msgid "super-sed version %s\n" msgstr "super-sed versione %s\n" + # sed/sed.c:256 #: sed/sed.c:256 msgid "" "based on GNU sed version 3.02.80\n" *************** *** 269,279 **** --- 311,323 ---- "basato su GNU sed 3.02.80\n" "\n" + # sed/sed.c:258 #: sed/sed.c:258 #, c-format msgid "GNU sed version %s\n" msgstr "GNU sed versione %s\n" + # sed/sed.c:260 #: sed/sed.c:260 #, c-format msgid "" *************** *** 288,298 **** --- 332,344 ---- "NON c'è garanzia; neppure di COMMERCIABILITA' o IDONEITA' AD UN PARTICOLARE\n" "SCOPO, nei limiti permessi dalla legge.\n" + # lib/utils.c:131 #: lib/utils.c:131 #, c-format msgid "Couldn't open file %s: %s" msgstr "Impossibile aprire il file %s: %s" + # lib/utils.c:161 #: lib/utils.c:161 #, c-format msgid "couldn't write %d item to %s: %s" *************** *** 300,374 **** --- 346,438 ---- msgstr[0] "Impossibile scrivere %d elemento su %s: %s" msgstr[1] "Impossibile scrivere %d elementi su %s: %s" + # lib/utils.c:176 #: lib/utils.c:176 #, c-format msgid "read error on %s: %s" msgstr "errore di lettura su %s: %s" + # lib/regcomp.c:179 #: lib/regcomp.c:179 msgid "Success" msgstr "Successo" + # lib/regcomp.c:182 #: lib/regcomp.c:182 msgid "No match" msgstr "Nessuna corrispondenza trovata" + # lib/regcomp.c:185 #: lib/regcomp.c:185 msgid "Invalid regular expression" msgstr "Espressione regolare non valida" + # lib/regcomp.c:188 #: lib/regcomp.c:188 msgid "Invalid collation character" msgstr "Carattere di ordinamento non valido" + # lib/regcomp.c:191 #: lib/regcomp.c:191 msgid "Invalid character class name" msgstr "Nome non valido per una classe di caratteri" + # lib/regcomp.c:194 #: lib/regcomp.c:194 msgid "Trailing backslash" msgstr "Barra rovesciata alla fine dell'espressione regolare" + # lib/regcomp.c:197 #: lib/regcomp.c:197 msgid "Invalid back reference" msgstr "Riferimento non valido" + # lib/regcomp.c:200 #: lib/regcomp.c:200 msgid "Unmatched [ or [^" msgstr "`[' non bilanciata" + # lib/regcomp.c:203 #: lib/regcomp.c:203 msgid "Unmatched ( or \\(" msgstr "`(' o `\\(' non bilanciata" + # lib/regcomp.c:206 #: lib/regcomp.c:206 msgid "Unmatched \\{" msgstr "`\\{' non bilanciata" + # lib/regcomp.c:209 #: lib/regcomp.c:209 msgid "Invalid content of \\{\\}" msgstr "numero di ripetizioni specificato tra graffe non valido" + # lib/regcomp.c:212 #: lib/regcomp.c:212 msgid "Invalid range end" msgstr "Fine dell'intervallo non valida" + # lib/regcomp.c:215 #: lib/regcomp.c:215 msgid "Memory exhausted" msgstr "Memoria esaurita" + # lib/regcomp.c:218 #: lib/regcomp.c:218 msgid "Invalid preceding regular expression" msgstr "Espressione regolare precedente non valida" + # lib/regcomp.c:221 #: lib/regcomp.c:221 msgid "Premature end of regular expression" msgstr "Fine prematura dell'espressione regolare" + # lib/regcomp.c:224 #: lib/regcomp.c:224 msgid "Regular expression too big" msgstr "Espressione regolare troppo grande" + # lib/regcomp.c:227 #: lib/regcomp.c:227 msgid "Unmatched ) or \\)" msgstr "`)' o `\\)' non bilanciata" Binary files sed-4.0.3/po/ja.gmo and sed-4.0.4/po/ja.gmo differ diff -rNC3 sed-4.0.3/po/ja.po sed-4.0.4/po/ja.po *** sed-4.0.3/po/ja.po Mon Oct 28 08:07:43 2002 --- sed-4.0.4/po/ja.po Thu Dec 12 19:56:13 2002 *************** *** 9,15 **** msgid "" msgstr "" "Project-Id-Version: GNU sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-24 23:18+0900\n" "Last-Translator: IIDA Yosiaki \n" "Language-Team: Japanese \n" --- 9,15 ---- msgid "" msgstr "" "Project-Id-Version: GNU sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-24 23:18+0900\n" "Last-Translator: IIDA Yosiaki \n" "Language-Team: Japanese \n" *************** *** 162,176 **** msgstr "ÆâÉô¥¨¥é¡¼: ÉÔÀµ¥³¥Þ¥ó¥É %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "ľÁ°¤ÎÀµµ¬É½¸½¤¬¡¢¤¢¤ê¤Þ¤»¤ó" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "½¤¾þ»Ò¤Ï¡¢¶õ¤ÎÀµµ¬É½¸½¤Ë»ØÄê¤Ç¤­¤Þ¤»¤ó" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "¡Ös¡×¥³¥Þ¥ó¥É¤Î±¦Â¦¤Ë̵¸ú¤Ê\\%d¤Î»²¾È" --- 162,176 ---- msgstr "ÆâÉô¥¨¥é¡¼: ÉÔÀµ¥³¥Þ¥ó¥É %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "ľÁ°¤ÎÀµµ¬É½¸½¤¬¡¢¤¢¤ê¤Þ¤»¤ó" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "½¤¾þ»Ò¤Ï¡¢¶õ¤ÎÀµµ¬É½¸½¤Ë»ØÄê¤Ç¤­¤Þ¤»¤ó" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "¡Ös¡×¥³¥Þ¥ó¥É¤Î±¦Â¦¤Ë̵¸ú¤Ê\\%d¤Î»²¾È" Binary files sed-4.0.3/po/ko.gmo and sed-4.0.4/po/ko.gmo differ diff -rNC3 sed-4.0.3/po/ko.po sed-4.0.4/po/ko.po *** sed-4.0.3/po/ko.po Mon Oct 28 08:07:43 2002 --- sed-4.0.4/po/ko.po Thu Dec 12 19:56:14 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: GNU sed 3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2001-10-12 17:26+0900\n" "Last-Translator: Jong-Hoon Ryu \n" "Language-Team: Korean \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: GNU sed 3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2001-10-12 17:26+0900\n" "Last-Translator: Jong-Hoon Ryu \n" "Language-Team: Korean \n" *************** *** 157,171 **** msgstr "³»ºÎ ¿À·ù: %c (Àº)´Â À߸øµÈ ¸í·ÉÀÔ´Ï´Ù" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 157,171 ---- msgstr "³»ºÎ ¿À·ù: %c (Àº)´Â À߸øµÈ ¸í·ÉÀÔ´Ï´Ù" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/nl.gmo and sed-4.0.4/po/nl.gmo differ diff -rNC3 sed-4.0.3/po/nl.po sed-4.0.4/po/nl.po *** sed-4.0.3/po/nl.po Mon Oct 28 08:07:43 2002 --- sed-4.0.4/po/nl.po Thu Dec 12 19:56:14 2002 *************** *** 6,12 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-25 01:08-04:00\n" "Last-Translator: Ivo Timmermans \n" "Language-Team: Dutch \n" --- 6,12 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-25 01:08-04:00\n" "Last-Translator: Ivo Timmermans \n" "Language-Team: Dutch \n" *************** *** 159,173 **** msgstr "INTERNE FOUT: Onjuist cmd %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Geen voorgaande reguliere expressie" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 159,173 ---- msgstr "INTERNE FOUT: Onjuist cmd %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Geen voorgaande reguliere expressie" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/pt_BR.gmo and sed-4.0.4/po/pt_BR.gmo differ diff -rNC3 sed-4.0.3/po/pt_BR.po sed-4.0.4/po/pt_BR.po *** sed-4.0.3/po/pt_BR.po Tue Nov 19 20:40:43 2002 --- sed-4.0.4/po/pt_BR.po Thu Dec 12 19:56:14 2002 *************** *** 8,14 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0.1\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-11-08 17:44-0300\n" "Last-Translator: Aurélio Marinho Jargas \n" "Language-Team: Brazilian Portuguese \n" --- 8,14 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0.1\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-11-08 17:44-0300\n" "Last-Translator: Aurélio Marinho Jargas \n" "Language-Team: Brazilian Portuguese \n" *************** *** 161,175 **** msgstr "ERRO INTERNO: Comando inválido %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Não há expressão regular anterior" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Não é permitido especificar modificadores numa expressão regular vazia" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referência inválida \\%d na segunda parte do comando `s'" --- 161,175 ---- msgstr "ERRO INTERNO: Comando inválido %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Não há expressão regular anterior" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Não é permitido especificar modificadores numa expressão regular vazia" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Referência inválida \\%d na segunda parte do comando `s'" Binary files sed-4.0.3/po/ru.gmo and sed-4.0.4/po/ru.gmo differ diff -rNC3 sed-4.0.3/po/ru.po sed-4.0.4/po/ru.po *** sed-4.0.3/po/ru.po Mon Oct 28 08:07:44 2002 --- sed-4.0.4/po/ru.po Thu Dec 12 19:56:14 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 1998-11-23 17:30+07:00\n" "Last-Translator: Const Kaplinsky \n" "Language-Team: Russian \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 1998-11-23 17:30+07:00\n" "Last-Translator: Const Kaplinsky \n" "Language-Team: Russian \n" *************** *** 157,171 **** msgstr "÷îõôòåîîññ ïûéâëá: ÎÅÄÏÐÕÓÔÉÍÁÑ ËÏÍÁÎÄÁ `%c'" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "îÅÔ ÐÒÅÄÙÄÕÝÅÇÏ ÒÅÇÕÌÑÒÎÏÇÏ ×ÙÒÁÖÅÎÉÑ" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 157,171 ---- msgstr "÷îõôòåîîññ ïûéâëá: ÎÅÄÏÐÕÓÔÉÍÁÑ ËÏÍÁÎÄÁ `%c'" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "îÅÔ ÐÒÅÄÙÄÕÝÅÇÏ ÒÅÇÕÌÑÒÎÏÇÏ ×ÙÒÁÖÅÎÉÑ" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" diff -rNC3 sed-4.0.3/po/sed.pot sed-4.0.4/po/sed.pot *** sed-4.0.3/po/sed.pot Mon Oct 28 08:07:39 2002 --- sed-4.0.4/po/sed.pot Thu Dec 12 19:56:11 2002 *************** *** 6,12 **** msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" --- 6,12 ---- msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" *************** *** 158,172 **** msgstr "" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 158,172 ---- msgstr "" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/sk.gmo and sed-4.0.4/po/sk.gmo differ diff -rNC3 sed-4.0.3/po/sk.po sed-4.0.4/po/sk.po *** sed-4.0.3/po/sk.po Mon Oct 28 08:07:44 2002 --- sed-4.0.4/po/sk.po Thu Dec 12 19:56:14 2002 *************** *** 6,12 **** msgid "" msgstr "" "Project-Id-Version: GNU sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-25 08:37+0200\n" "Last-Translator: Marcel Telka \n" "Language-Team: Slovak \n" --- 6,12 ---- msgid "" msgstr "" "Project-Id-Version: GNU sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-25 08:37+0200\n" "Last-Translator: Marcel Telka \n" "Language-Team: Slovak \n" *************** *** 159,174 **** msgstr "INTERN� CHYBA: zl� pr�kaz %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Bez predch�dzaj�ceho regul�rneho v�razu" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" "Nie je mo�n� zada� modifik�tory pre pr�zdny regul�rny v�raz" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Neplatn� odkaz \\%d na `s' pr�kazu RHS" --- 159,174 ---- msgstr "INTERN� CHYBA: zl� pr�kaz %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Bez predch�dzaj�ceho regul�rneho v�razu" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" "Nie je mo�n� zada� modifik�tory pre pr�zdny regul�rny v�raz" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Neplatn� odkaz \\%d na `s' pr�kazu RHS" Binary files sed-4.0.3/po/sl.gmo and sed-4.0.4/po/sl.gmo differ diff -rNC3 sed-4.0.3/po/sl.po sed-4.0.4/po/sl.po *** sed-4.0.3/po/sl.po Mon Oct 28 08:07:44 2002 --- sed-4.0.4/po/sl.po Thu Dec 12 19:56:14 2002 *************** *** 6,12 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-25 15:17+0200\n" "Last-Translator: Primo¾ Peterlin \n" "Language-Team: Slovenian \n" --- 6,12 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-25 15:17+0200\n" "Last-Translator: Primo¾ Peterlin \n" "Language-Team: Slovenian \n" *************** *** 160,174 **** msgstr "INTERNA NAPAKA: Pokvarjen ukaz %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Prej¹nji regularni izraz manjka" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Doloèanje modifikatorjev pri praznem regularnem izrazu ni mogoèe" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Neveljavni sklic \\%d na desni strani ukaza ,s`" --- 160,174 ---- msgstr "INTERNA NAPAKA: Pokvarjen ukaz %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Prej¹nji regularni izraz manjka" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Doloèanje modifikatorjev pri praznem regularnem izrazu ni mogoèe" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Neveljavni sklic \\%d na desni strani ukaza ,s`" Binary files sed-4.0.3/po/sv.gmo and sed-4.0.4/po/sv.gmo differ diff -rNC3 sed-4.0.3/po/sv.po sed-4.0.4/po/sv.po *** sed-4.0.3/po/sv.po Mon Oct 28 08:07:44 2002 --- sed-4.0.4/po/sv.po Thu Dec 12 19:56:15 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-10-25 19:18+0200\n" "Last-Translator: Christian Rose \n" "Language-Team: Swedish \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 4.0\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-10-25 19:18+0200\n" "Last-Translator: Christian Rose \n" "Language-Team: Swedish \n" *************** *** 158,172 **** msgstr "INTERNT FEL: Felaktigt kommando %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Inget tidigare reguljärt uttryck" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "Kan inte ange modifierare på tomt reguljärt uttryck" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ogiltig referens \\%d på \"s\"-kommandots RHS" --- 158,172 ---- msgstr "INTERNT FEL: Felaktigt kommando %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Inget tidigare reguljärt uttryck" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "Kan inte ange modifierare på tomt reguljärt uttryck" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "Ogiltig referens \\%d på \"s\"-kommandots RHS" Binary files sed-4.0.3/po/tr.gmo and sed-4.0.4/po/tr.gmo differ diff -rNC3 sed-4.0.3/po/tr.po sed-4.0.4/po/tr.po *** sed-4.0.3/po/tr.po Mon Oct 28 08:07:44 2002 --- sed-4.0.4/po/tr.po Thu Dec 12 19:56:15 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2001-07-05 22:41EEST\n" "Last-Translator: Deniz Akkus Kanca \n" "Language-Team: Turkish \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02a\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2001-07-05 22:41EEST\n" "Last-Translator: Deniz Akkus Kanca \n" "Language-Team: Turkish \n" *************** *** 158,172 **** msgstr "ÝÇ HATA: Hatalý komut %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "Daha önce düzenli ifade yok" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 158,172 ---- msgstr "ÝÇ HATA: Hatalý komut %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "Daha önce düzenli ifade yok" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" Binary files sed-4.0.3/po/zh_CN.gmo and sed-4.0.4/po/zh_CN.gmo differ diff -rNC3 sed-4.0.3/po/zh_CN.po sed-4.0.4/po/zh_CN.po *** sed-4.0.3/po/zh_CN.po Mon Oct 28 08:07:45 2002 --- sed-4.0.4/po/zh_CN.po Thu Dec 12 19:56:15 2002 *************** *** 5,11 **** msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-10-28 08:07+0100\n" "PO-Revision-Date: 2002-08-18 11:11+0800\n" "Last-Translator: Wang Li \n" "Language-Team: Chinese (simplified) \n" --- 5,11 ---- msgid "" msgstr "" "Project-Id-Version: sed 3.02.80\n" ! "POT-Creation-Date: 2002-12-12 19:56+0100\n" "PO-Revision-Date: 2002-08-18 11:11+0800\n" "Last-Translator: Wang Li \n" "Language-Team: Chinese (simplified) \n" *************** *** 157,171 **** msgstr "ÄÚ²¿´íÎ󣺴íÎóµÄÃüÁî %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:658 sed/regex.c:47 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:48 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:146 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" --- 157,171 ---- msgstr "ÄÚ²¿´íÎ󣺴íÎóµÄÃüÁî %c" #. XXX shouldn't this be (UCHAR_MAX+1)? ! #: lib/regcomp.c:661 sed/regex.c:46 msgid "No previous regular expression" msgstr "" ! #: sed/regex.c:47 msgid "Cannot specify modifiers on empty regexp" msgstr "" ! #: sed/regex.c:145 #, c-format msgid "Invalid reference \\%d on `s' command's RHS" msgstr "" diff -rNC3 sed-4.0.3/sed/compile.c sed-4.0.4/sed/compile.c *** sed-4.0.3/sed/compile.c Fri Oct 18 17:57:45 2002 --- sed-4.0.4/sed/compile.c Fri Nov 22 11:50:37 2002 *************** *** 1,5 **** /* GNU SED, a batch stream editor. ! Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1998, 1999 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify --- 1,5 ---- /* GNU SED, a batch stream editor. ! Copyright (C) 1989,90,91,92,93,94,95,98,99,2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify diff -rNC3 sed-4.0.3/sed/execute.c sed-4.0.4/sed/execute.c *** sed-4.0.3/sed/execute.c Fri Oct 18 17:59:34 2002 --- sed-4.0.4/sed/execute.c Fri Nov 22 11:50:44 2002 *************** *** 1,6 **** /* GNU SED, a batch stream editor. ! Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1998, 1999 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify --- 1,6 ---- /* GNU SED, a batch stream editor. ! Copyright (C) 1989,90,91,92,93,94,95,98,99,2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify diff -rNC3 sed-4.0.3/sed/fmt.c sed-4.0.4/sed/fmt.c *** sed-4.0.3/sed/fmt.c Thu Feb 28 13:12:19 2002 --- sed-4.0.4/sed/fmt.c Fri Nov 22 11:50:51 2002 *************** *** 1,5 **** /* `L' command implementation for GNU sed, based on GNU fmt 1.22. ! Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by --- 1,5 ---- /* `L' command implementation for GNU sed, based on GNU fmt 1.22. ! Copyright (C) 1994, 1995, 1996, 2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -rNC3 sed-4.0.3/sed/regex.c sed-4.0.4/sed/regex.c *** sed-4.0.3/sed/regex.c Wed Oct 23 12:48:22 2002 --- sed-4.0.4/sed/regex.c Fri Nov 22 11:51:04 2002 *************** *** 1,6 **** /* GNU SED, a batch stream editor. ! Copyright (C) 1999 ! Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by --- 1,5 ---- /* GNU SED, a batch stream editor. ! Copyright (C) 1999, 2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -rNC3 sed-4.0.3/sed/sed.c sed-4.0.4/sed/sed.c *** sed-4.0.3/sed/sed.c Mon Oct 28 07:56:02 2002 --- sed-4.0.4/sed/sed.c Fri Nov 22 11:51:09 2002 *************** *** 1,9 **** ! #define COPYRIGHT_NOTICE "Copyright (C) 1999 Free Software Foundation, Inc." #define BUG_ADDRESS "bonzini@gnu.org" /* GNU SED, a batch stream editor. ! Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1998, 1999 \ ! Free Software Foundation, Inc." This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by --- 1,9 ---- ! #define COPYRIGHT_NOTICE "Copyright (C) 2002 Free Software Foundation, Inc." #define BUG_ADDRESS "bonzini@gnu.org" /* GNU SED, a batch stream editor. ! Copyright (C) 1989,90,91,92,93,94,95,98,99,2002 ! Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -rNC3 sed-4.0.3/sed/sed.h sed-4.0.4/sed/sed.h *** sed-4.0.3/sed/sed.h Fri Oct 18 17:58:53 2002 --- sed-4.0.4/sed/sed.h Thu Dec 12 19:48:04 2002 *************** *** 1,5 **** /* GNU SED, a batch stream editor. ! Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1998, 1999 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify --- 1,5 ---- /* GNU SED, a batch stream editor. ! Copyright (C) 1989,90,91,92,93,94,95,98,99,2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify *************** *** 101,107 **** struct replacement { char *prefix; size_t prefix_length; ! char subst_id; enum replacement_types repl_type; struct replacement *next; }; --- 101,107 ---- struct replacement { char *prefix; size_t prefix_length; ! int subst_id; enum replacement_types repl_type; struct replacement *next; }; diff -rNC3 sed-4.0.3/testsuite/version.gin sed-4.0.4/testsuite/version.gin *** sed-4.0.3/testsuite/version.gin Tue Oct 22 21:55:48 2002 --- sed-4.0.4/testsuite/version.gin Fri Nov 22 12:06:35 2002 *************** *** 1,5 **** GNU sed version @VERSION@ ! Copyright (C) 1999 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, to the extent permitted by law. --- 1,5 ---- GNU sed version @VERSION@ ! Copyright (C) 2002 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, to the extent permitted by law.