class interface REGULAR_EXPRESSION
   -- ePCRE - Perl-Compatible Regular Expressions written in Eiffel
   --
   -- havily based on the sources of the PCRE library package Version 3.4
   -- which is open source software, written by Philip Hazel, and copyright
   -- by the University of Cambridge, England.
   -- ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
   -- PCRE is a library of functions to support regular expressions whose syntax
   -- and semantics are as close as possible to those of the Perl 5 language.
   --
   -- (PCRE can also be found on www.sourceforge.com)
   --
   -- Ported from the C-Source to Eiffel by Harald Erdbrügger he@he-me.de
   -- (hopefully no more gotos, continues, breaks and returns ;-) )
   --
   -- Version 1.01	2001-03-29  - option handling improved (most options can now
   --             	          	  changed while the matcher is active (see assertions)
   --             	          	- error in next_match fixed
   --             	          	- some minor fixes / improvements
   --             	          	- improved localization (i.e. for german umlaute, see
   --            	          	  example.e)
   --             	          	- fixed error in replace routines
   --             	          	- change the access to subexpession in the replace area.
   --             	          	  now the syntax is \refno\ (i.e. \3\ to access the
   --             	          	  matched part of the third subexpession
   -- Version 1.0	2001-03-26	- first public release
   --
   -- For documentation see also the file pcre.txt from the original source
   --

creation
   make
      -- Create new REGULAR_EXPRESSION object.
      ensure
         all_reset: not is_compiled and not has_matched and not is_matcher_active

feature(s) from RE_CHARACTER_TABLES
   -- Access
   -- each compiler and matcher run uses these character sets to classify a character
   -- type. make your own changes to these sets if you would localize your compiler.
   -- have in mind that changes will be shared by all compilers, so the changes
   -- made manipulates the behaviour of all following compiler runs of all compiler!

   Default_char_casing: RE_CASING_MAPS
      --!! TBD

   Default_word_set: RE_CHARACTER_SET
      --!! TBD

feature(s) from RE_COMPILER
   -- character casing

   char_casing: RE_CASING_MAPS

   word_set: RE_CHARACTER_SET

   set_char_casing (a_casing_maps: RE_CASING_MAPS)
      require
         not is_compiled;
         a_casing_maps /= Void
      ensure
         char_casing = a_casing_maps

   set_word_set (a_word_set: RE_CHARACTER_SET)
      require
         not is_compiled;
         a_word_set /= Void
      ensure
         word_set = a_word_set

feature(s) from RE_COMPILER
   -- query

   is_compiled: BOOLEAN
      -- last compilation successfully?

   compile_error: STRING
      -- holds the state of the compile execution

   error_offset: INTEGER
      -- hold the position in pattern where the error is detected
      -- Result <= 0 implies no valid position found

feature(s) from RE_COMPILER
   -- public options not changeable after compilation

   is_caseless: BOOLEAN
      -- If this bit is set, letters in the pattern match both  upper
      -- and  lower  case  letters.  It  is  equivalent  to Perl's /i
      -- option.

   is_extended: BOOLEAN
      -- If this bit is set, whitespace data characters in  the  pat-
      -- tern  are  totally  ignored  except when escaped or inside a
      -- character class, and characters between an unescaped #  out-
      -- side  a  character  class  and  the  next newline character,
      -- inclusive, are also ignored. This is equivalent to Perl's /x
      -- option,  and  makes  it  possible to include comments inside
      -- complicated patterns. Note, however, that this applies  only
      -- to  data  characters. Whitespace characters may never appear
      -- within special character sequences in a pattern, for example
      -- within  the sequence (?( which introduces a conditional sub-
      -- pattern.

   is_greedy: BOOLEAN
      -- This option inverts the "greediness" of the  quantifiers  so
      -- that  they  are  not greedy by default, but become greedy if
      -- followed by "?". It is not compatible with Perl. It can also
      -- be set by a (?U) option setting within the pattern.

feature(s) from RE_COMPILER
   -- public options

   is_multiline: BOOLEAN
      -- By default, PCRE treats the subject string as consisting  of
      -- a  single "line" of characters (even if it actually contains
      -- several newlines). The "start  of  line"  metacharacter  (^)
      -- matches  only  at the start of the string, while the "end of
      -- line" metacharacter ($) matches  only  at  the  end  of  the
      -- string,    or   before   a   terminating   newline   (unless
      -- is_dollar_endonly is set). This is the same as Perl.
      -- When is_multiline it is set, the "start of line" and  "end
      -- of  line"  constructs match immediately following or immedi-
      -- ately before any newline  in  the  subject  string,  respec-
      -- tively,  as  well  as  at  the  very  start and end. This is
      -- equivalent to Perl's /m option. If there are no "\n" charac-
      -- ters  in  a subject string, or no occurrences of ^ or $ in a
      -- pattern, setting is_multiline has no effect.

   is_dotall: BOOLEAN
      -- If this bit is  set,  a  dot  metacharater  in  the  pattern
      -- matches all characters, including newlines. Without it, new-
      -- lines are excluded. This option is equivalent to  Perl's  /s
      -- option.  A negative class such as [^a] always matches a new-
      -- line character, independent of the setting of this option.

   is_empty_allowed: BOOLEAN
      -- An empty string is not considered to be  a  valid  match  if
      -- this option is cleared. If there are alternatives in the pat-
      -- tern, they are tried. If  all  the  alternatives  match  the
      -- empty  string,  the  entire match fails. For example, if the
      -- pattern       a?b?
      -- is applied to a string not beginning with  "a"  or  "b",  it
      -- matches  the  empty string at the start of the subject. With
      -- not  is_empty_allowed,  this  match  is  not  valid, so PCRE
      -- searches  further into the string for  occurrences of "a" or
      -- "b". Perl has no direct  equivalent of is_empty_allowed, but
      -- it does make a special case of a pattern  match of the empty
      -- string  within its split()  function, and when  using the /g
      -- modifier. It is possible to emulate  Perl's  behaviour after
      -- matching a null string by first trying the  match  again  at
      -- the same offset with  is_empty_allowed  cleared, and then if
      -- that fails by advancing the starting  offset (see below) and
      -- trying an ordinary match again.

   is_dollar_endonly: BOOLEAN
      -- If this bit is set, a dollar metacharacter  in  the  pattern
      -- matches  only at the end of the subject string. Without this
      -- option, a dollar also matches immediately before  the  final
      -- character  if it is a newline (but not before any other new-
      -- lines).  The  is_dollar_endonly  option  is   ignored   if
      -- is_multiline is set. There is no equivalent to this option
      -- in Perl.

   is_bol: BOOLEAN
      -- If not set, the  first  character of the  string is not  the
      -- beginning of a line, so the circumflex  metacharacter should
      -- not match before it. Clearing this without  is_multiline (at
      -- compiletime) causes circumflex never to match.

   is_eol: BOOLEAN
      -- If not set, the end of the  string is not the end of a line,
      -- so the  dollar metacharacter should not match it nor (except
      -- in multiline mode) a newline immediately before it.  Setting
      -- this without  is_multiline  (at compile time) causes  dollar
      -- never to match.

   is_anchored: BOOLEAN
      -- If this flag is set, the pattern is forced to be  "anchored",
      -- that is, it is constrained to match only at the start of the
      -- string which is being searched (the "subject string").  This
      -- effect can also be achieved by appropriate constructs in the
      -- pattern itself (^), which is the only way to do it in Perl.

feature(s) from RE_COMPILER
   -- public options setting

   set_default_options
      require
         not is_compiled

   set_caseless (a_state: BOOLEAN)
      require
         not is_compiled
      ensure
         is_caseless = a_state

   set_extended (a_state: BOOLEAN)
      require
         not is_compiled
      ensure
         is_extended = a_state

   set_greedy (a_state: BOOLEAN)
      require
         not is_compiled
      ensure
         is_greedy = a_state

   set_multiline (a_state: BOOLEAN)
      ensure
         is_multiline = a_state

   set_dotall (a_state: BOOLEAN)
      ensure
         is_dotall = a_state

   set_empty_allowed (a_state: BOOLEAN)
      ensure
         is_empty_allowed = a_state

   set_dollar_endonly (a_state: BOOLEAN)
      ensure
         is_dollar_endonly = a_state

   set_bol (a_state: BOOLEAN)
      ensure
         is_bol = a_state

   set_eol (a_state: BOOLEAN)
      ensure
         is_eol = a_state

   set_anchored (a_state: BOOLEAN)
      ensure
         is_anchored = a_state

feature(s) from REGULAR_EXPRESSION
   -- query matcher infos

   subject: STRING
      -- Actual subject to match

   subject_start: INTEGER
      --!! TBD

   subject_end: INTEGER
      -- Portion of interest of subject

   is_matcher_active: BOOLEAN
      --!! TBD

   has_matched: BOOLEAN
      --!! TBD
      ensure
         Result = (match_count > 0) --!! matched: Result implies match_count > 0

   match_count: INTEGER
      -- Number of matched patterns
      -- Result > 1 implies there are matched (sub-) portions

   matched_portion (a_n: INTEGER): STRING
      -- returns the n-th matched portion of the last matched subject
      -- 0 represent the whole matched portion
      require
         active_matcher: is_matcher_active;
         valid_index: a_n >= 0 and then a_n < match_count

   matched_portion_in (a_result: STRING; a_n: INTEGER)
      -- Append the n-th matched portion of last matched subject to a_result
      -- 0 represent the whole matched portion
      require
         active_matcher: is_matcher_active;
         valid_index: a_n >= 0 and then a_n < match_count

   start_of_portion (a_n: INTEGER): INTEGER
      -- start position of the n-th subexpression
      -- a_n = 0 represent the whole matched portion of the subject
      -- if the result = 0 implies undefined subexpression
      require
         active_matcher: is_matcher_active;
         valid_index: a_n >= 0 and then a_n < match_count
      ensure
         Result /= 0 implies Result >= subject_start;
         Result <= subject_end

   end_of_portion (a_n: INTEGER): INTEGER
      -- end position of the n-th subexpression
      -- a_n = 0 represent the whole matched expression
      -- if the result = -1 implies undefined subexpression
      require
         active_matcher: is_matcher_active;
         valid_index: a_n >= 0 and then a_n < match_count
      ensure
         Result >= - 1 and then Result <= subject_end

feature(s) from REGULAR_EXPRESSION
   -- Element Change

   reset
      -- reset is needed only if options must be changed. for an option change
      -- a recompilation is needed
      ensure
         all_reset: not is_compiled and not has_matched and not is_matcher_active

feature(s) from REGULAR_EXPRESSION
   -- compiler commands

   compile (a_pattern: STRING)
      -- There are two different sets of meta-characters: those  that
      -- are  recognized anywhere in the pattern except within square
      -- brackets, and those that are recognized in square  brackets.
      -- Outside square brackets, the meta-characters are as follows:
      --   \      general escape character with several uses
      --   ^      assert start of subject (or line, in multiline mode)
      --   $      assert end of subject (or line, in multiline mode)
      --   .      match any character except newline (by default)
      --   [      start character class definition
      --   |      start of alternative branch
      --   (      start subpattern
      --   )      end subpattern
      --   ?      extends the meaning of (
      --          also 0 or 1 quantifier
      --          also quantifier minimizer
      --   *      0 or more quantifier
      --   +      1 or more quantifier
      --   {      start min/max quantifier
      --
      -- Part of a pattern that is in square  brackets  is  called  a
      -- "character  class".  In  a  character  class  the only meta-
      -- characters are:
      --   \      general escape character
      --   ^      negate the class, but only if the first character
      --   -      indicates character range
      --   ]      terminates the character class

feature(s) from REGULAR_EXPRESSION
   -- matcher commands

   match (a_subject: STRING)
      -- sets the value of match_count and the infos of the first
      -- matched portion (if any)
      require
         compiled: is_compiled;
         valid_subject: a_subject /= Void
      ensure
         subject = a_subject;
         subject_start = 1;
         subject_end = a_subject.count;
         is_matcher_active

   match_substring (a_subject: STRING; a_from, a_to: INTEGER)
      -- sets the value of match_count and the infos of the first
      -- matched portion (if any)
      require
         compiled: is_compiled;
         valid_subject: a_subject /= Void
      ensure
         subject = a_subject;
         subject_start = a_from;
         subject_end = a_to;
         is_matcher_active

   first_match
      -- rewind the matcher to the first match (if any)
      require
         active_matcher: is_matcher_active

   next_match
      -- match next portion (if any)
      require
         active_matcher: is_matcher_active

feature(s) from REGULAR_EXPRESSION
   -- replace commands

   replacement (a_replacement: STRING): STRING
      -- returns the replacement with evaluated references.
      -- makes only sense, if you use reference to the matched subject portions \n\
      -- where n is an number from a subexpression of the compiled pattern
      -- if no references used, this feature returns a copy of a_replacement
      require
         active_matcher: is_matcher_active;
         valid_replacement: a_replacement /= Void

   replacement_in (a_result, a_replacement: STRING)
      -- appends the replacement with evaluated references to a_result.
      -- makes only sense, if you use reference to the matched subject portions \n\
      -- where n is an number from a subexpression of the compiled pattern
      -- if no references used, this feature puts a copy of a_replacement in a_result
      require
         active_matcher: is_matcher_active;
         valid_result: a_result /= Void;
         valid_replacement: a_replacement /= Void

   replace (a_replacement: STRING): STRING
      -- returns a new string with the result of the replace action. only the
      -- actual matched portion would be replaced by a_replacement
      require
         active_matcher: is_matcher_active;
         valid_replacement: a_replacement /= Void

   replace_in (a_result, a_replacement: STRING)
      -- append in a_result the result of the replace action. only the actual
      -- matched portion would be replace by a_replacement (if any) else a copy
      -- of the subject is appended to a_result
      require
         active_matcher: is_matcher_active;
         valid_result: a_result /= Void;
         valid_replacement: a_replacement /= Void

   replace_all (a_replacement: STRING): STRING
      -- returns a new string with the result of the replace action. replacement
      -- takes place over the whole subject string
      require
         active_matcher: is_matcher_active;
         valid_replacement: a_replacement /= Void
      ensure
         not has_matched

   replace_all_in (a_result, a_replacement: STRING)
      -- append in a_result the result of the replace action. replacement
      -- takes place over the whole subject string
      require
         active_matcher: is_matcher_active;
         valid_result: a_result /= Void;
         valid_replacement: a_replacement /= Void
      ensure
         not has_matched

feature(s) from REGULAR_EXPRESSION
   -- split commands

   split: ARRAY[STRING]
      -- splits the string subject in not matched portions of the
      -- compiled pattern and returns a new array with all pieces in
      require
         active_matcher: is_matcher_active
      ensure
         not has_matched

   split_in (a_collection: COLLECTION[STRING])
      -- splits the string subject in not matched portions of the
      -- compiled pattern and append all pieces in a_collection
      require
         active_matcher: is_matcher_active;
         valid_collection: a_collection /= Void
      ensure
         not has_matched;
         a_collection.count >= old a_collection.count

invariant

    valid_first_character: not first_character.in_range(0,255) implies first_character = - 1;

    valid_required_character: not required_character.in_range(0,255) implies required_character >= - 2;

    is_matcher_active implies is_compiled;

end of REGULAR_EXPRESSION