blocxx
|
00001 /******************************************************************************* 00002 * Copyright (C) 2005 Novell, Inc. All rights reserved. 00003 * 00004 * Redistribution and use in source and binary forms, with or without 00005 * modification, are permitted provided that the following conditions are met: 00006 * 00007 * - Redistributions of source code must retain the above copyright notice, 00008 * this list of conditions and the following disclaimer. 00009 * 00010 * - Redistributions in binary form must reproduce the above copyright notice, 00011 * this list of conditions and the following disclaimer in the documentation 00012 * and/or other materials provided with the distribution. 00013 * 00014 * - Neither the name of Vintela, Inc., Novell, Inc., nor the names of its 00015 * contributors may be used to endorse or promote products derived from this 00016 * software without specific prior written permission. 00017 * 00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00021 * ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc., Novell, Inc., OR THE 00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 *******************************************************************************/ 00034 #include "blocxx/PosixRegEx.hpp" 00035 #ifdef BLOCXX_HAVE_REGEX 00036 #ifdef BLOCXX_HAVE_REGEX_H 00037 00038 #include "blocxx/ExceptionIds.hpp" 00039 #include "blocxx/Assertion.hpp" 00040 #include "blocxx/Format.hpp" 00041 00042 00043 namespace BLOCXX_NAMESPACE 00044 { 00045 00046 namespace 00047 { 00048 // the REG_NOERROR enum value from linux's regex.h is non-standard, so don't use it. 00049 const int REG_NOERROR = 0; 00050 } 00051 00052 // ------------------------------------------------------------------- 00053 static String 00054 substitute_caps(const PosixRegEx::MatchArray &sub, 00055 const String &str, const String &rep) 00056 { 00057 static const char *cap_refs[] = { 00058 NULL, "\\1", "\\2", "\\3", "\\4", 00059 "\\5", "\\6", "\\7", "\\8", "\\9", NULL 00060 }; 00061 00062 String res( rep); 00063 size_t pos; 00064 00065 for(size_t i=1; cap_refs[i] != NULL; i++) 00066 { 00067 String cap; 00068 00069 if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0) 00070 { 00071 cap = str.substring(sub[i].rm_so, sub[i].rm_eo 00072 - sub[i].rm_so); 00073 } 00074 00075 pos = res.indexOf(cap_refs[i]); 00076 while( pos != String::npos) 00077 { 00078 size_t quotes = 0; 00079 size_t at = pos; 00080 00081 while( at > 0 && res.charAt(--at) == '\\') 00082 quotes++; 00083 00084 if( quotes % 2) 00085 { 00086 quotes = (quotes + 1) / 2; 00087 00088 res = res.erase(pos - quotes, quotes); 00089 00090 pos = res.indexOf(cap_refs[i], 00091 pos + 2 - quotes); 00092 } 00093 else 00094 { 00095 quotes = quotes / 2; 00096 00097 res = res.substring(0, pos - quotes) + 00098 cap + 00099 res.substring(pos + 2); 00100 00101 pos = res.indexOf(cap_refs[i], 00102 pos + cap.length() - quotes); 00103 } 00104 } 00105 } 00106 return res; 00107 } 00108 00109 00110 // ------------------------------------------------------------------- 00111 static inline String 00112 getError(const regex_t *preg, const int code) 00113 { 00114 char err[256] = { '\0'}; 00115 ::regerror(code, preg, err, sizeof(err)); 00116 return String(err); 00117 } 00118 00119 00120 // ------------------------------------------------------------------- 00121 PosixRegEx::PosixRegEx() 00122 : compiled(false) 00123 , m_flags(0) 00124 , m_ecode(REG_NOERROR) 00125 { 00126 } 00127 00128 00129 // ------------------------------------------------------------------- 00130 PosixRegEx::PosixRegEx(const String ®ex, int cflags) 00131 : compiled(false) 00132 , m_flags(0) 00133 , m_ecode(REG_NOERROR) 00134 { 00135 if( !compile(regex, cflags)) 00136 { 00137 BLOCXX_THROW_ERR(RegExCompileException, 00138 errorString().c_str(), m_ecode); 00139 } 00140 } 00141 00142 00143 // ------------------------------------------------------------------- 00144 PosixRegEx::PosixRegEx(const PosixRegEx &ref) 00145 : compiled(false) 00146 , m_flags(ref.m_flags) 00147 , m_ecode(REG_NOERROR) 00148 , m_rxstr(ref.m_rxstr) 00149 { 00150 if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags)) 00151 { 00152 BLOCXX_THROW_ERR(RegExCompileException, 00153 errorString().c_str(), m_ecode); 00154 } 00155 } 00156 00157 00158 // ------------------------------------------------------------------- 00159 PosixRegEx::~PosixRegEx() 00160 { 00161 if( compiled) 00162 { 00163 regfree(&m_regex); 00164 } 00165 } 00166 00167 00168 // ------------------------------------------------------------------- 00169 PosixRegEx & 00170 PosixRegEx::operator = (const PosixRegEx &ref) 00171 { 00172 if( !ref.compiled) 00173 { 00174 m_ecode = REG_NOERROR; 00175 m_error.erase(); 00176 m_flags = ref.m_flags; 00177 m_rxstr = ref.m_rxstr; 00178 if( compiled) 00179 { 00180 regfree(&m_regex); 00181 compiled = false; 00182 } 00183 } 00184 else if( !compile(ref.m_rxstr, ref.m_flags)) 00185 { 00186 BLOCXX_THROW_ERR(RegExCompileException, 00187 errorString().c_str(), m_ecode); 00188 } 00189 return *this; 00190 } 00191 00192 00193 // ------------------------------------------------------------------- 00194 bool 00195 PosixRegEx::compile(const String ®ex, int cflags) 00196 { 00197 if( compiled) 00198 { 00199 regfree(&m_regex); 00200 compiled = false; 00201 } 00202 00203 m_rxstr = regex; 00204 m_flags = cflags; 00205 m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags); 00206 if( m_ecode == REG_NOERROR) 00207 { 00208 compiled = true; 00209 m_error.erase(); 00210 return true; 00211 } 00212 else 00213 { 00214 m_error = getError(&m_regex, m_ecode); 00215 return false; 00216 } 00217 } 00218 00219 00220 // ------------------------------------------------------------------- 00221 int 00222 PosixRegEx::errorCode() 00223 { 00224 return m_ecode; 00225 } 00226 00227 00228 // ------------------------------------------------------------------- 00229 String 00230 PosixRegEx::errorString() const 00231 { 00232 return m_error; 00233 } 00234 00235 00236 // ------------------------------------------------------------------- 00237 String 00238 PosixRegEx::patternString() const 00239 { 00240 return m_rxstr; 00241 } 00242 00243 00244 // ------------------------------------------------------------------- 00245 int 00246 PosixRegEx::compileFlags() const 00247 { 00248 return m_flags; 00249 } 00250 00251 00252 // ------------------------------------------------------------------- 00253 bool 00254 PosixRegEx::isCompiled() const 00255 { 00256 return compiled; 00257 } 00258 00259 00260 // ------------------------------------------------------------------- 00261 bool 00262 PosixRegEx::execute(MatchArray &sub, const String &str, 00263 size_t index, size_t count, int eflags) 00264 { 00265 if( !compiled) 00266 { 00267 BLOCXX_THROW(RegExCompileException, 00268 "Regular expression is not compiled"); 00269 } 00270 00271 if( index > str.length()) 00272 { 00273 BLOCXX_THROW(OutOfBoundsException, 00274 Format("String index out of bounds (" 00275 "length = %1, index = %2).", 00276 str.length(), index 00277 ).c_str()); 00278 } 00279 00280 if( count == 0) 00281 { 00282 count = m_regex.re_nsub + 1; 00283 } 00284 AutoPtrVec<regmatch_t> rsub(new regmatch_t[count]); 00285 rsub[0].rm_so = -1; 00286 rsub[0].rm_eo = -1; 00287 00288 sub.clear(); 00289 m_ecode = ::regexec(&m_regex, str.c_str() + index, 00290 count, rsub.get(), eflags); 00291 if( m_ecode == REG_NOERROR) 00292 { 00293 m_error.erase(); 00294 if( m_flags & REG_NOSUB) 00295 { 00296 return true; 00297 } 00298 00299 sub.resize(count); 00300 for(size_t n = 0; n < count; n++) 00301 { 00302 if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0) 00303 { 00304 sub[n] = rsub[n]; 00305 } 00306 else 00307 { 00308 rsub[n].rm_so += index; 00309 rsub[n].rm_eo += index; 00310 sub[n] = rsub[n]; 00311 } 00312 } 00313 return true; 00314 } 00315 else 00316 { 00317 m_error = getError(&m_regex, m_ecode); 00318 return false; 00319 } 00320 } 00321 00322 00323 // ------------------------------------------------------------------- 00324 StringArray 00325 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags) 00326 { 00327 if( !compiled) 00328 { 00329 BLOCXX_THROW(RegExCompileException, 00330 "Regular expression is not compiled"); 00331 } 00332 00333 MatchArray rsub; 00334 StringArray ssub; 00335 00336 bool match = execute(rsub, str, index, count, eflags); 00337 if( match) 00338 { 00339 if( rsub.empty()) 00340 { 00341 BLOCXX_THROW(RegExCompileException, 00342 "Non-capturing regular expression"); 00343 } 00344 00345 MatchArray::const_iterator i=rsub.begin(); 00346 for( ; i != rsub.end(); ++i) 00347 { 00348 if( i->rm_so >= 0 && i->rm_eo >= 0) 00349 { 00350 ssub.push_back(str.substring(i->rm_so, 00351 i->rm_eo - i->rm_so)); 00352 } 00353 else 00354 { 00355 ssub.push_back(String("")); 00356 } 00357 } 00358 } 00359 else if(m_ecode != REG_NOMATCH) 00360 { 00361 BLOCXX_THROW_ERR(RegExExecuteException, 00362 errorString().c_str(), m_ecode); 00363 } 00364 return ssub; 00365 } 00366 00367 00368 // ------------------------------------------------------------------- 00369 blocxx::String 00370 PosixRegEx::replace(const String &str, const String &rep, 00371 bool global, int eflags) 00372 { 00373 if( !compiled) 00374 { 00375 BLOCXX_THROW(RegExCompileException, 00376 "Regular expression is not compiled"); 00377 } 00378 00379 MatchArray rsub; 00380 bool match; 00381 size_t off = 0; 00382 String out = str; 00383 00384 do 00385 { 00386 match = execute(rsub, out, off, 0, eflags); 00387 if( match) 00388 { 00389 if( rsub.empty() || 00390 rsub[0].rm_so < 0 || 00391 rsub[0].rm_eo < 0) 00392 { 00393 // only if empty (missused as guard). 00394 BLOCXX_THROW(RegExCompileException, 00395 "Non-capturing regular expression"); 00396 } 00397 00398 String res = substitute_caps(rsub, out, rep); 00399 00400 out = out.substring(0, rsub[0].rm_so) + 00401 res + out.substring(rsub[0].rm_eo); 00402 00403 off = rsub[0].rm_so + res.length(); 00404 } 00405 else if(m_ecode == REG_NOMATCH) 00406 { 00407 m_ecode = REG_NOERROR; 00408 m_error.erase(); 00409 } 00410 else 00411 { 00412 BLOCXX_THROW_ERR(RegExExecuteException, 00413 errorString().c_str(), m_ecode); 00414 } 00415 } while(global && match && out.length() > off); 00416 00417 return out; 00418 } 00419 00420 // ------------------------------------------------------------------- 00421 StringArray 00422 PosixRegEx::split(const String &str, bool empty, int eflags) 00423 { 00424 if( !compiled) 00425 { 00426 BLOCXX_THROW(RegExCompileException, 00427 "Regular expression is not compiled"); 00428 } 00429 00430 MatchArray rsub; 00431 StringArray ssub; 00432 bool match; 00433 size_t off = 0; 00434 size_t len = str.length(); 00435 00436 do 00437 { 00438 match = execute(rsub, str, off, 1, eflags); 00439 if( match) 00440 { 00441 if( rsub.empty() || 00442 rsub[0].rm_so < 0 || 00443 rsub[0].rm_eo < 0) 00444 { 00445 BLOCXX_THROW(RegExCompileException, 00446 "Non-capturing regular expression"); 00447 } 00448 00449 if( empty || ((size_t)rsub[0].rm_so > off)) 00450 { 00451 ssub.push_back(str.substring(off, 00452 rsub[0].rm_so - off)); 00453 } 00454 off = rsub[0].rm_eo; 00455 } 00456 else if(m_ecode == REG_NOMATCH) 00457 { 00458 String tmp = str.substring(off); 00459 if( empty || !tmp.empty()) 00460 { 00461 ssub.push_back(tmp); 00462 } 00463 m_ecode = REG_NOERROR; 00464 m_error.erase(); 00465 } 00466 else 00467 { 00468 BLOCXX_THROW_ERR(RegExExecuteException, 00469 errorString().c_str(), m_ecode); 00470 } 00471 } while(match && len > off); 00472 00473 return ssub; 00474 } 00475 00476 00477 // ------------------------------------------------------------------- 00478 StringArray 00479 PosixRegEx::grep(const StringArray &src, int eflags) 00480 { 00481 if( !compiled) 00482 { 00483 BLOCXX_THROW(RegExCompileException, 00484 "Regular expression is not compiled"); 00485 } 00486 00487 m_ecode = REG_NOERROR; 00488 m_error.erase(); 00489 00490 StringArray out; 00491 if( !src.empty()) 00492 { 00493 StringArray::const_iterator i=src.begin(); 00494 for( ; i != src.end(); ++i) 00495 { 00496 int ret = ::regexec(&m_regex, i->c_str(), 00497 0, NULL, eflags); 00498 if( ret == REG_NOERROR) 00499 { 00500 out.push_back(*i); 00501 } 00502 else if(ret != REG_NOMATCH) 00503 { 00504 m_ecode = ret; 00505 m_error = getError(&m_regex, m_ecode); 00506 BLOCXX_THROW_ERR(RegExExecuteException, 00507 errorString().c_str(), m_ecode); 00508 } 00509 } 00510 } 00511 00512 return out; 00513 } 00514 00515 00516 // ------------------------------------------------------------------- 00517 bool 00518 PosixRegEx::match(const String &str, size_t index, int eflags) const 00519 { 00520 if( !compiled) 00521 { 00522 BLOCXX_THROW(RegExCompileException, 00523 "Regular expression is not compiled"); 00524 } 00525 00526 if( index > str.length()) 00527 { 00528 BLOCXX_THROW(OutOfBoundsException, 00529 Format("String index out of bounds (" 00530 "length = %1, index = %2).", 00531 str.length(), index 00532 ).c_str()); 00533 } 00534 00535 m_ecode = ::regexec(&m_regex, str.c_str() + index, 00536 0, NULL, eflags); 00537 00538 if( m_ecode == REG_NOERROR) 00539 { 00540 m_error.erase(); 00541 return true; 00542 } 00543 else if(m_ecode == REG_NOMATCH) 00544 { 00545 m_error = getError(&m_regex, m_ecode); 00546 return false; 00547 } 00548 else 00549 { 00550 m_error = getError(&m_regex, m_ecode); 00551 BLOCXX_THROW_ERR(RegExExecuteException, 00552 errorString().c_str(), m_ecode); 00553 } 00554 } 00555 00556 00557 // ------------------------------------------------------------------- 00558 } // namespace BLOCXX_NAMESPACE 00559 00560 #endif // BLOCXX_HAVE_REGEX_H 00561 #endif // BLOCXX_HAVE_REGEX 00562 00563 /* vim: set ts=8 sts=8 sw=8 ai noet: */ 00564