blocxx

PosixRegEx.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002 * Copyright (C) 2005 Novell, Inc. All rights reserved.
00003 *
00004 * Redistribution and use in source and binary forms, with or without
00005 * modification, are permitted provided that the following conditions are met:
00006 *
00007 *  - Redistributions of source code must retain the above copyright notice,
00008 *    this list of conditions and the following disclaimer.
00009 *
00010 *  - Redistributions in binary form must reproduce the above copyright notice,
00011 *    this list of conditions and the following disclaimer in the documentation
00012 *    and/or other materials provided with the distribution.
00013 *
00014 *  - Neither the name of Vintela, Inc., Novell, Inc., nor the names of its
00015 *    contributors may be used to endorse or promote products derived from this
00016 *    software without specific prior written permission.
00017 *
00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 * ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc., Novell, Inc., OR THE 
00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 *******************************************************************************/
00034 #include "blocxx/PosixRegEx.hpp"
00035 #ifdef BLOCXX_HAVE_REGEX
00036 #ifdef BLOCXX_HAVE_REGEX_H
00037 
00038 #include "blocxx/ExceptionIds.hpp"
00039 #include "blocxx/Assertion.hpp"
00040 #include "blocxx/Format.hpp"
00041 
00042 
00043 namespace BLOCXX_NAMESPACE
00044 {
00045 
00046 namespace
00047 {
00048 // the REG_NOERROR enum value from linux's regex.h is non-standard, so don't use it.
00049 const int REG_NOERROR = 0;
00050 }
00051 
00052 // -------------------------------------------------------------------
00053 static String
00054 substitute_caps(const PosixRegEx::MatchArray &sub,
00055                 const String &str, const String &rep)
00056 {
00057    static const char *cap_refs[] = {
00058       NULL,  "\\1", "\\2", "\\3", "\\4",
00059       "\\5", "\\6", "\\7", "\\8", "\\9", NULL
00060    };
00061 
00062    String res( rep);
00063    size_t pos;
00064 
00065    for(size_t i=1; cap_refs[i] != NULL; i++)
00066    {
00067       String cap;
00068 
00069       if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
00070       {
00071          cap = str.substring(sub[i].rm_so, sub[i].rm_eo
00072                                          - sub[i].rm_so);
00073       }
00074 
00075       pos = res.indexOf(cap_refs[i]);
00076       while( pos != String::npos)
00077       {
00078          size_t quotes = 0;
00079          size_t at = pos;
00080 
00081          while( at > 0 && res.charAt(--at) == '\\')
00082             quotes++;
00083 
00084          if( quotes % 2)
00085          {
00086             quotes = (quotes + 1) / 2;
00087 
00088             res = res.erase(pos - quotes, quotes);
00089 
00090             pos = res.indexOf(cap_refs[i],
00091                               pos + 2 - quotes);
00092          }
00093          else
00094          {
00095             quotes = quotes / 2;
00096 
00097             res = res.substring(0, pos - quotes) +
00098                   cap +
00099                   res.substring(pos + 2);
00100 
00101             pos = res.indexOf(cap_refs[i],
00102                               pos + cap.length() - quotes);
00103          }
00104       }
00105    }
00106    return res;
00107 }
00108 
00109 
00110 // -------------------------------------------------------------------
00111 static inline String
00112 getError(const regex_t *preg, const int code)
00113 {
00114    char err[256] = { '\0'};
00115    ::regerror(code, preg, err, sizeof(err));
00116    return String(err);
00117 }
00118 
00119 
00120 // -------------------------------------------------------------------
00121 PosixRegEx::PosixRegEx()
00122    : compiled(false)
00123    , m_flags(0)
00124    , m_ecode(REG_NOERROR)
00125 {
00126 }
00127 
00128 
00129 // -------------------------------------------------------------------
00130 PosixRegEx::PosixRegEx(const String &regex, int cflags)
00131    : compiled(false)
00132    , m_flags(0)
00133    , m_ecode(REG_NOERROR)
00134 {
00135    if( !compile(regex, cflags))
00136    {
00137       BLOCXX_THROW_ERR(RegExCompileException,
00138          errorString().c_str(), m_ecode);
00139    }
00140 }
00141 
00142 
00143 // -------------------------------------------------------------------
00144 PosixRegEx::PosixRegEx(const PosixRegEx &ref)
00145    : compiled(false)
00146    , m_flags(ref.m_flags)
00147    , m_ecode(REG_NOERROR)
00148    , m_rxstr(ref.m_rxstr)
00149 {
00150    if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
00151    {
00152       BLOCXX_THROW_ERR(RegExCompileException,
00153          errorString().c_str(), m_ecode);
00154    }
00155 }
00156 
00157 
00158 // -------------------------------------------------------------------
00159 PosixRegEx::~PosixRegEx()
00160 {
00161    if( compiled)
00162    {
00163       regfree(&m_regex);
00164    }
00165 }
00166 
00167 
00168 // -------------------------------------------------------------------
00169 PosixRegEx &
00170 PosixRegEx::operator = (const PosixRegEx &ref)
00171 {
00172    if( !ref.compiled)
00173    {
00174       m_ecode = REG_NOERROR;
00175       m_error.erase();
00176       m_flags = ref.m_flags;
00177       m_rxstr = ref.m_rxstr;
00178       if( compiled)
00179       {
00180          regfree(&m_regex);
00181          compiled = false;
00182       }
00183    }
00184    else if( !compile(ref.m_rxstr, ref.m_flags))
00185    {
00186       BLOCXX_THROW_ERR(RegExCompileException,
00187          errorString().c_str(), m_ecode);
00188    }
00189    return *this;
00190 }
00191 
00192 
00193 // -------------------------------------------------------------------
00194 bool
00195 PosixRegEx::compile(const String &regex, int cflags)
00196 {
00197    if( compiled)
00198    {
00199       regfree(&m_regex);
00200       compiled = false;
00201    }
00202 
00203    m_rxstr = regex;
00204    m_flags = cflags;
00205    m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
00206    if( m_ecode == REG_NOERROR)
00207    {
00208       compiled = true;
00209       m_error.erase();
00210       return true;
00211    }
00212    else
00213    {
00214       m_error = getError(&m_regex, m_ecode);
00215       return false;
00216    }
00217 }
00218 
00219 
00220 // -------------------------------------------------------------------
00221 int
00222 PosixRegEx::errorCode()
00223 {
00224    return m_ecode;
00225 }
00226 
00227 
00228 // -------------------------------------------------------------------
00229 String
00230 PosixRegEx::errorString() const
00231 {
00232    return m_error;
00233 }
00234 
00235 
00236 // -------------------------------------------------------------------
00237 String
00238 PosixRegEx::patternString() const
00239 {
00240    return m_rxstr;
00241 }
00242 
00243 
00244 // -------------------------------------------------------------------
00245 int
00246 PosixRegEx::compileFlags() const
00247 {
00248    return m_flags;
00249 }
00250 
00251 
00252 // -------------------------------------------------------------------
00253 bool
00254 PosixRegEx::isCompiled() const
00255 {
00256    return compiled;
00257 }
00258 
00259 
00260 // -------------------------------------------------------------------
00261 bool
00262 PosixRegEx::execute(MatchArray &sub, const String &str,
00263                size_t index, size_t count, int eflags)
00264 {
00265    if( !compiled)
00266    {
00267       BLOCXX_THROW(RegExCompileException,
00268          "Regular expression is not compiled");
00269    }
00270 
00271    if( index > str.length())
00272    {
00273       BLOCXX_THROW(OutOfBoundsException,
00274          Format("String index out of bounds ("
00275                 "length = %1, index = %2).",
00276                 str.length(), index
00277          ).c_str());
00278    }
00279 
00280    if( count == 0)
00281    {
00282       count = m_regex.re_nsub + 1;
00283    }
00284    AutoPtrVec<regmatch_t> rsub(new regmatch_t[count]);
00285    rsub[0].rm_so = -1;
00286    rsub[0].rm_eo = -1;
00287 
00288    sub.clear();
00289    m_ecode = ::regexec(&m_regex, str.c_str() + index,
00290                        count, rsub.get(), eflags);
00291    if( m_ecode == REG_NOERROR)
00292    {
00293       m_error.erase();
00294       if( m_flags & REG_NOSUB)
00295       {
00296          return true;
00297       }
00298 
00299       sub.resize(count);
00300       for(size_t n = 0; n < count; n++)
00301       {
00302          if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
00303          {
00304             sub[n] = rsub[n];
00305          }
00306          else
00307          {
00308             rsub[n].rm_so += index;
00309             rsub[n].rm_eo += index;
00310             sub[n] = rsub[n];
00311          }
00312       }
00313       return true;
00314    }
00315    else
00316    {
00317       m_error = getError(&m_regex, m_ecode);
00318       return false;
00319    }
00320 }
00321 
00322 
00323 // -------------------------------------------------------------------
00324 StringArray
00325 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
00326 {
00327    if( !compiled)
00328    {
00329       BLOCXX_THROW(RegExCompileException,
00330          "Regular expression is not compiled");
00331    }
00332 
00333    MatchArray  rsub;
00334    StringArray ssub;
00335 
00336    bool match = execute(rsub, str, index, count, eflags);
00337    if( match)
00338    {
00339       if( rsub.empty())
00340       {
00341          BLOCXX_THROW(RegExCompileException,
00342             "Non-capturing regular expression");
00343       }
00344 
00345       MatchArray::const_iterator i=rsub.begin();
00346       for( ; i != rsub.end(); ++i)
00347       {
00348          if( i->rm_so >= 0 && i->rm_eo >= 0)
00349          {
00350             ssub.push_back(str.substring(i->rm_so,
00351                                 i->rm_eo - i->rm_so));
00352          }
00353          else
00354          {
00355             ssub.push_back(String(""));
00356          }
00357       }
00358    }
00359    else if(m_ecode != REG_NOMATCH)
00360    {
00361       BLOCXX_THROW_ERR(RegExExecuteException,
00362          errorString().c_str(), m_ecode);
00363    }
00364    return ssub;
00365 }
00366 
00367 
00368 // -------------------------------------------------------------------
00369 blocxx::String
00370 PosixRegEx::replace(const String &str, const String &rep,
00371                     bool global, int eflags)
00372 {
00373    if( !compiled)
00374    {
00375       BLOCXX_THROW(RegExCompileException,
00376          "Regular expression is not compiled");
00377    }
00378 
00379    MatchArray  rsub;
00380    bool        match;
00381    size_t      off = 0;
00382    String      out = str;
00383 
00384    do
00385    {
00386       match = execute(rsub, out, off, 0, eflags);
00387       if( match)
00388       {
00389          if( rsub.empty()      ||
00390              rsub[0].rm_so < 0 ||
00391              rsub[0].rm_eo < 0)
00392          {
00393             // only if empty (missused as guard).
00394             BLOCXX_THROW(RegExCompileException,
00395                "Non-capturing regular expression");
00396          }
00397 
00398          String res = substitute_caps(rsub, out, rep);
00399 
00400          out = out.substring(0, rsub[0].rm_so) +
00401                res + out.substring(rsub[0].rm_eo);
00402 
00403          off = rsub[0].rm_so + res.length();
00404       }
00405       else if(m_ecode == REG_NOMATCH)
00406       {
00407          m_ecode = REG_NOERROR;
00408          m_error.erase();
00409       }
00410       else
00411       {
00412          BLOCXX_THROW_ERR(RegExExecuteException,
00413             errorString().c_str(), m_ecode);
00414       }
00415    } while(global && match && out.length() > off);
00416 
00417    return out;
00418 }
00419 
00420 // -------------------------------------------------------------------
00421 StringArray
00422 PosixRegEx::split(const String &str, bool empty, int eflags)
00423 {
00424    if( !compiled)
00425    {
00426       BLOCXX_THROW(RegExCompileException,
00427          "Regular expression is not compiled");
00428    }
00429 
00430    MatchArray  rsub;
00431    StringArray ssub;
00432    bool        match;
00433    size_t      off = 0;
00434    size_t      len = str.length();
00435 
00436    do
00437    {
00438       match = execute(rsub, str, off, 1, eflags);
00439       if( match)
00440       {
00441          if( rsub.empty()      ||
00442              rsub[0].rm_so < 0 ||
00443              rsub[0].rm_eo < 0)
00444          {
00445             BLOCXX_THROW(RegExCompileException,
00446                "Non-capturing regular expression");
00447          }
00448 
00449          if( empty || ((size_t)rsub[0].rm_so > off))
00450          {
00451             ssub.push_back(str.substring(off,
00452                                rsub[0].rm_so - off));
00453          }
00454          off = rsub[0].rm_eo;
00455       }
00456       else if(m_ecode == REG_NOMATCH)
00457       {
00458          String tmp = str.substring(off);
00459          if( empty || !tmp.empty())
00460          {
00461             ssub.push_back(tmp);
00462          }
00463          m_ecode = REG_NOERROR;
00464          m_error.erase();
00465       }
00466       else
00467       {
00468          BLOCXX_THROW_ERR(RegExExecuteException,
00469             errorString().c_str(), m_ecode);
00470       }
00471    } while(match && len > off);
00472 
00473    return ssub;
00474 }
00475 
00476 
00477 // -------------------------------------------------------------------
00478 StringArray
00479 PosixRegEx::grep(const StringArray &src, int eflags)
00480 {
00481    if( !compiled)
00482    {
00483       BLOCXX_THROW(RegExCompileException,
00484          "Regular expression is not compiled");
00485    }
00486 
00487    m_ecode = REG_NOERROR;
00488    m_error.erase();
00489 
00490    StringArray out;
00491    if( !src.empty())
00492    {
00493       StringArray::const_iterator i=src.begin();
00494       for( ; i != src.end(); ++i)
00495       {
00496          int ret = ::regexec(&m_regex, i->c_str(),
00497                              0, NULL, eflags);
00498          if( ret == REG_NOERROR)
00499          {
00500             out.push_back(*i);
00501          }
00502          else if(ret != REG_NOMATCH)
00503          {
00504             m_ecode = ret;
00505             m_error = getError(&m_regex, m_ecode);
00506             BLOCXX_THROW_ERR(RegExExecuteException,
00507                errorString().c_str(), m_ecode);
00508          }
00509       }
00510    }
00511 
00512    return out;
00513 }
00514 
00515 
00516 // -------------------------------------------------------------------
00517 bool
00518 PosixRegEx::match(const String &str, size_t index, int eflags) const
00519 {
00520    if( !compiled)
00521    {
00522       BLOCXX_THROW(RegExCompileException,
00523          "Regular expression is not compiled");
00524    }
00525 
00526    if( index > str.length())
00527    {
00528       BLOCXX_THROW(OutOfBoundsException,
00529          Format("String index out of bounds ("
00530                 "length = %1, index = %2).",
00531                 str.length(), index
00532          ).c_str());
00533    }
00534 
00535    m_ecode = ::regexec(&m_regex, str.c_str() + index,
00536                        0, NULL, eflags);
00537 
00538    if( m_ecode == REG_NOERROR)
00539    {
00540       m_error.erase();
00541       return true;
00542    }
00543    else if(m_ecode == REG_NOMATCH)
00544    {
00545       m_error = getError(&m_regex, m_ecode);
00546       return false;
00547    }
00548    else
00549    {
00550       m_error = getError(&m_regex, m_ecode);
00551       BLOCXX_THROW_ERR(RegExExecuteException,
00552          errorString().c_str(), m_ecode);
00553    }
00554 }
00555 
00556 
00557 // -------------------------------------------------------------------
00558 } // namespace BLOCXX_NAMESPACE
00559 
00560 #endif // BLOCXX_HAVE_REGEX_H
00561 #endif // BLOCXX_HAVE_REGEX
00562 
00563 /* vim: set ts=8 sts=8 sw=8 ai noet: */
00564