blocxx

String.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002 * Copyright (C) 2005, Vintela, Inc. All rights reserved.
00003 * Copyright (C) 2006, Novell, Inc. All rights reserved.
00004 * 
00005 * Redistribution and use in source and binary forms, with or without
00006 * modification, are permitted provided that the following conditions are met:
00007 * 
00008 *     * Redistributions of source code must retain the above copyright notice,
00009 *       this list of conditions and the following disclaimer.
00010 *     * Redistributions in binary form must reproduce the above copyright
00011 *       notice, this list of conditions and the following disclaimer in the
00012 *       documentation and/or other materials provided with the distribution.
00013 *     * Neither the name of 
00014 *       Vintela, Inc., 
00015 *       nor Novell, Inc., 
00016 *       nor the names of its contributors or employees may be used to 
00017 *       endorse or promote products derived from this software without 
00018 *       specific prior written permission.
00019 * 
00020 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00021 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00022 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00023 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00024 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00025 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00026 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00027 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00028 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00029 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00030 * POSSIBILITY OF SUCH DAMAGE.
00031 *******************************************************************************/
00032 
00033 
00039 #include "blocxx/BLOCXX_config.h"
00040 #include "blocxx/String.hpp"
00041 #include "blocxx/Char16.hpp"
00042 #include "blocxx/Array.hpp"
00043 #include "blocxx/StringStream.hpp"
00044 #include "blocxx/Format.hpp"
00045 #include "blocxx/BinarySerialization.hpp"
00046 #include "blocxx/Assertion.hpp"
00047 #include "blocxx/AutoPtr.hpp"
00048 #include "blocxx/Bool.hpp"
00049 #include "blocxx/UTF8Utils.hpp"
00050 #include "blocxx/ExceptionIds.hpp"
00051 #include "blocxx/COWIntrusiveCountableBase.hpp"
00052 
00053 #include <cstdio>
00054 #include <cstdlib>
00055 #include <cstring>
00056 #include <cctype>
00057 #include <cstdarg>
00058 #include <cerrno>
00059 #if defined(BLOCXX_HAVE_ISTREAM) && defined(BLOCXX_HAVE_OSTREAM)
00060 #include <istream>
00061 #include <ostream>
00062 #else
00063 #include <iostream>
00064 #endif
00065 #include <cmath> // for HUGE_VAL
00066 #include <cfloat> // for DBL_MANT_DIG
00067 
00068 #ifdef BLOCXX_WIN32
00069 #define SNPRINTF _snprintf
00070 #else
00071 #define SNPRINTF snprintf
00072 #endif
00073 
00074 namespace BLOCXX_NAMESPACE
00075 {
00076 
00077 using std::istream;
00078 using std::ostream;
00079 using std::streambuf;
00080 
00081 BLOCXX_DEFINE_EXCEPTION_WITH_ID(StringConversion);
00082 
00084 static inline int
00085 strncmpi(const char* s1, const char* s2, size_t n)
00086 {
00087    String ls1(s1, n);
00088    String ls2(s2, n);
00089    return ls1.compareToIgnoreCase(ls2);
00090 }
00091 
00092 // class invariant: m_buf points to a null-terminated sequence of characters. m_buf is m_len+1 bytes long.
00093 class String::ByteBuf : public COWIntrusiveCountableBase
00094 {
00095 public:
00096    ByteBuf(const char* s) :
00097       m_len(::strlen(s)), m_buf(new char[m_len+1])
00098    {
00099       strcpy(m_buf, s);
00100    }
00101 
00102    ByteBuf(const ByteBuf& arg)
00103       : COWIntrusiveCountableBase(arg)
00104       , m_len(arg.m_len)
00105       , m_buf(new char[m_len+1])
00106    {
00107       strcpy(m_buf, arg.m_buf);
00108    }
00109    
00110    ByteBuf(AutoPtrVec<char>& s, size_t len)
00111       : m_len(len), m_buf(s.release())
00112    {
00113    }
00114    
00115    ~ByteBuf() { delete [] m_buf; }
00116    
00117    ByteBuf& operator= (const ByteBuf& arg)
00118    {
00119       char* buf = new char[arg.m_len+1];
00120       strcpy(buf, arg.m_buf);
00121       delete [] m_buf;
00122       m_buf = buf;
00123       m_len = arg.m_len;
00124       return *this;
00125    }
00126    
00127    size_t length() const { return m_len; }
00128    char* data() const { return m_buf; }
00129    ByteBuf* clone() const { return new ByteBuf(*this); }
00130 private:
00131    size_t m_len;
00132    char* m_buf;
00133 };
00135 #if defined(BLOCXX_AIX)
00136 const size_t String::npos = ~0;
00137 #endif
00138 
00139 String::String() :
00140    m_buf(0)
00141 {
00142 }
00144 String::String(Bool parm) :
00145    m_buf(parm.toString().m_buf)
00146 {
00147 }
00149 String::String(const Char16& parm) :
00150    m_buf(parm.toString().m_buf)
00151 {
00152 }
00154 String::String(const std::string& str) :
00155    m_buf(str.empty() ? 0 : new ByteBuf(str.c_str()))
00156 {
00157 }
00158 #if defined(BLOCXX_WIN32)
00159 #define snprintf _snprintf // stupid windoze...
00160 #endif
00161 
00162 String::String(Int32 val) :
00163    m_buf(NULL)
00164 {
00165    char tmpbuf[32];
00166    int len = snprintf(tmpbuf, sizeof(tmpbuf), "%d", val);
00167    AutoPtrVec<char> bfr(new char[len+1]);
00168    ::snprintf(bfr.get(), len+1, "%d", val);
00169    m_buf = new ByteBuf(bfr, len);
00170 }
00172 String::String(UInt32 val) :
00173    m_buf(NULL)
00174 {
00175    char tmpbuf[32];
00176    int len = ::snprintf(tmpbuf, sizeof(tmpbuf), "%u", val);
00177    AutoPtrVec<char> bfr(new char[len+1]);
00178    ::snprintf(bfr.get(), len+1, "%u", val);
00179    m_buf = new ByteBuf(bfr, len);
00180 }
00181 #if defined(BLOCXX_INT32_IS_INT) && defined(BLOCXX_INT64_IS_LONG_LONG)
00182 
00183 String::String(long val) :
00184    m_buf(NULL)
00185 {
00186    char tmpbuf[32];
00187    int len = snprintf(tmpbuf, sizeof(tmpbuf), "%ld", val);
00188    AutoPtrVec<char> bfr(new char[len+1]);
00189    ::snprintf(bfr.get(), len+1, "%ld", val);
00190    m_buf = new ByteBuf(bfr, len);
00191 }
00193 String::String(unsigned long val) :
00194    m_buf(NULL)
00195 {
00196    char tmpbuf[32];
00197    int len = ::snprintf(tmpbuf, sizeof(tmpbuf), "%lu", val);
00198    AutoPtrVec<char> bfr(new char[len+1]);
00199    ::snprintf(bfr.get(), len+1, "%lu", val);
00200    m_buf = new ByteBuf(bfr, len);
00201 }
00202 #endif
00203 #if defined(BLOCXX_WIN32)
00204 #undef snprintf
00205 #endif
00206 
00207 String::String(Int64 val) :
00208    m_buf(NULL)
00209 {
00210    OStringStream ss(33);
00211    ss << val;
00212    m_buf = new ByteBuf(ss.c_str());
00213 }
00215 String::String(UInt64 val) :
00216    m_buf(NULL)
00217 {
00218 #if defined(BLOCXX_INT64_IS_LONG)
00219    char tmpbuf[32];
00220    ::snprintf(tmpbuf, sizeof(tmpbuf), "%lu", val);
00221    m_buf = new ByteBuf(tmpbuf);
00222 #elif defined(BLOCXX_INT64_IS_LONG_LONG)
00223    // unfortunately not all C libraries support long long with snprintf().
00224    // but the C++ iostream library handles it.
00225    OStringStream ss;
00226    ss << val;
00227    m_buf = new ByteBuf(ss.c_str());
00228 #endif
00229 }
00231 // decimal digits = ceiling((bits)*ln(2)/ln(10))
00232 String::String(Real32 val) :
00233    m_buf(NULL)
00234 {
00235    char tmpbuf[128];
00236 #if FLT_RADIX == 2
00237 #if defined(BLOCXX_REAL32_IS_FLOAT)
00238 	::SNPRINTF(tmpbuf, sizeof(tmpbuf), "%.*g", FLT_MANT_DIG * 3 / 10 + 1, static_cast<double>(val));
00239 #elif defined(BLOCXX_REAL32_IS_DOUBLE)
00240 	::SNPRINTF(tmpbuf, sizeof(tmpbuf), "%.*g", DBL_MANT_DIG * 3 / 10 + 1, val);
00241 #endif
00242 #else
00243 #error "The formula for computing the number of digits of precision for a floating point needs to be implmented. It's ceiling(bits * log(FLT_RADIX) / log(10))"
00244 #endif
00245    m_buf = new ByteBuf(tmpbuf);
00246 }
00248 String::String(Real64 val) :
00249    m_buf(NULL)
00250 {
00251    char tmpbuf[128];
00252 #if FLT_RADIX == 2
00253 #if defined(BLOCXX_REAL64_IS_DOUBLE)
00254 	::SNPRINTF(tmpbuf, sizeof(tmpbuf), "%.*g", DBL_MANT_DIG * 3 / 10 + 1, val);
00255 #elif defined(BLOCXX_REAL64_IS_LONG_DOUBLE)
00256 	::SNPRINTF(tmpbuf, sizeof(tmpbuf), "%.*Lg", LDBL_MANT_DIG * 3 / 10 + 1, val);
00257 #endif
00258 #else
00259 #error "The formula for computing the number of digits of precision for a floating point needs to be implmented. It's ceiling(bits * log(FLT_RADIX) / log(10))"
00260 #endif
00261    m_buf = new ByteBuf(tmpbuf);
00262 }
00264 String::String(const char* str) :
00265    m_buf(NULL)
00266 {
00267    m_buf = (NULL == str) ? 0 : new ByteBuf(str);
00268 }
00270 String::String(ETakeOwnershipFlag, char* allocatedMemory, size_t len) :
00271    m_buf(NULL)
00272 {
00273    BLOCXX_ASSERT(allocatedMemory != 0);
00274    AutoPtrVec<char> p(allocatedMemory);
00275    m_buf = new ByteBuf(p, len);
00276 }
00278 String::String(const char* str, size_t len) :
00279    m_buf(NULL)
00280 {
00281    if (NULL == str)
00282    {
00283       m_buf = 0;
00284    }
00285    else
00286    {
00287       AutoPtrVec<char> bfr(new char[len+1]);
00288       ::memcpy(bfr.get(), str, len);
00289       bfr[len] = '\0';
00290       m_buf = new ByteBuf(bfr, len);
00291    }
00292 }
00294 String::String(const String& arg) :
00295    m_buf(arg.m_buf)
00296 {
00297 }
00299 String::String(const Char16Array& ra) :
00300    m_buf(NULL)
00301 {
00302    size_t sz = ra.size();
00303    if (sz > 0)
00304    {
00305       StringBuffer buf(sz * 2);
00306       for (size_t i = 0; i < sz; i++)
00307       {
00308          buf += ra[i].toString();
00309       }
00310       m_buf = buf.releaseString().m_buf;
00311    }
00312    else
00313    {
00314       m_buf = 0;
00315    }
00316 }
00318 String::String(char c) :
00319    m_buf(NULL)
00320 {
00321    if (c != '\0')
00322    {
00323       char bfr[2];
00324       bfr[0] = c;
00325       bfr[1] = '\0';
00326       m_buf = new ByteBuf(bfr);
00327    }
00328    else
00329    {
00330       m_buf = 0;
00331    }
00332 }
00334 String::~String() 
00335 {
00336 }
00338 void
00339 String::swap(String& x)
00340 {
00341    m_buf.swap(x.m_buf);
00342 }
00344 char*
00345 String::allocateCString() const
00346 {
00347    size_t len = length() + 1;
00348    char* str = static_cast<char*>(malloc(len));
00349    ::strcpy(str, c_str());
00350    return str;
00351 }
00353 size_t
00354 String::length() const
00355 {
00356    return (m_buf) ? m_buf->length() : 0;
00357 }
00359 size_t
00360 String::UTF8Length() const
00361 {
00362    return UTF8Utils::charCount(c_str());
00363 }
00365 #ifdef BLOCXX_WIN32
00366 #define vsnprintf _vsnprintf // stupid windoze
00367 #endif
00368 int
00369 String::format(const char* fmt, ...)
00370 {
00371    int n, size = 64;
00372    AutoPtrVec<char> p(new char[size]);
00373    
00374    va_list ap;
00375    
00376    // Try to print in the allocated space
00377    while (true)
00378    {
00379       va_start(ap, fmt);
00380       n = vsnprintf(p.get(), size, fmt, ap);
00381       va_end(ap);                // If that worked, return the string.
00382       if (n > -1 && n < size)
00383       {
00384          m_buf = new ByteBuf(p, n);
00385          return static_cast<int>(length());
00386       }
00387       if (n > -1)    // glibc 2.1
00388          size = n+1; // precisely what is needed
00389       else           // glibc 2.0
00390          size *= 2;  // twice the old size
00391       p = new char[size];
00392    }
00393    // Not reachable.
00394    return 0;
00395 }
00396 #ifdef BLOCXX_WIN32
00397 #undef vsnprintf // stupid windoze
00398 #endif
00399 
00400 char
00401 String::charAt(size_t ndx) const
00402 {
00403    return (m_buf) ? m_buf->data()[ndx] : '\0';
00404 }
00406 int
00407 String::compareTo(const char* arg) const
00408 {
00409    const char* lhs = "";
00410    if (m_buf)
00411    {
00412       lhs = m_buf->data();
00413    }
00414    return ::strcmp(lhs, arg);
00415 }
00417 int
00418 String::compareTo(const String& arg) const
00419 {
00420    return compareTo(arg.c_str());
00421 }
00423 int
00424 String::compareToIgnoreCase(const char* arg) const
00425 {
00426    const char* lhs = "";
00427    if (m_buf)
00428    {
00429       lhs = m_buf->data();
00430    }
00431    return UTF8Utils::compareToIgnoreCase(lhs, arg);
00432 }
00434 int
00435 String::compareToIgnoreCase(const String& arg) const
00436 {
00437    return compareToIgnoreCase(arg.c_str());
00438 }
00439 
00441 String&
00442 String::concat(const char* arg)
00443 {
00444    if (arg && *arg)
00445    {
00446       size_t len = length() + ::strlen(arg);
00447       AutoPtrVec<char> bfr(new char[len+1]);
00448       bfr[0] = 0;
00449       if (m_buf)
00450       {
00451          ::strcpy(bfr.get(), m_buf->data());
00452       }
00453       ::strcat(bfr.get(), arg);
00454       m_buf = new ByteBuf(bfr, len);
00455    }
00456    return *this;
00457 }
00458 
00460 String&
00461 String::concat(char arg)
00462 {
00463    size_t newlen = length() + 1;
00464    AutoPtrVec<char> bfr(new char[newlen+1]);
00465    bfr[0] = 0;
00466    if (m_buf)
00467    {
00468       ::strcpy(bfr.get(), m_buf->data());
00469    }
00470    *(bfr.get()+length()) = arg;
00471    *(bfr.get()+newlen) = 0;
00472    m_buf = new ByteBuf(bfr, newlen);
00473    return *this;
00474 }
00475 
00477 bool
00478 String::endsWith(char arg) const
00479 {
00480    return (m_buf
00481          && m_buf->length()
00482          && m_buf->data()[m_buf->length()-1] == arg);
00483 }
00484 
00486 bool
00487 String::endsWith(const char* arg, EIgnoreCaseFlag ignoreCase) const
00488 {
00489    if (!arg || !*arg)
00490    {
00491       return (length() == 0);
00492    }
00493 
00494    if (!m_buf)
00495    {
00496       return false;
00497    }
00498 
00499    int ndx = static_cast<int>(length() - ::strlen(arg));
00500    if (ndx < 0)
00501    {
00502       return false;
00503    }
00504 
00505    return (ignoreCase)
00506       ? (UTF8Utils::compareToIgnoreCase(m_buf->data()+ndx, arg) == 0)
00507       : (::strcmp(m_buf->data()+ndx, arg) == 0);
00508 }
00510 bool
00511 String::equals(const char* arg) const
00512 {
00513    return(compareTo(arg) == 0);
00514 }
00516 bool
00517 String::equals(const String& arg) const
00518 {
00519    return equals(arg.c_str());
00520 }
00522 bool
00523 String::equalsIgnoreCase(const char* arg) const
00524 {
00525    return(compareToIgnoreCase(arg) == 0);
00526 }
00528 bool
00529 String::equalsIgnoreCase(const String& arg) const
00530 {
00531    return equalsIgnoreCase(arg.c_str());
00532 }
00534 UInt32
00535 String::hashCode() const
00536 {
00537    UInt32 hash = 0;
00538    size_t len = length();
00539    for (size_t i = 0; i < len; i++)
00540    {
00541       // Don't need to check if m_buf is null, because if it is, len == 0,
00542       // and this loop won't be executed.
00543       const char temp = m_buf->data()[i];
00544       hash = (hash << 4) + (temp * 13);
00545       UInt32 g = hash & 0xf0000000;
00546       if (g)
00547       {
00548          hash ^= (g >> 24);
00549          hash ^= g;
00550       }
00551    }
00552    return hash;
00553 }
00555 size_t
00556 String::indexOf(char ch, size_t fromIndex) const
00557 {
00558    //if (fromIndex < 0)
00559    //{
00560    // fromIndex = 0;
00561    //}
00562    size_t cc = npos;
00563    if (fromIndex < length())
00564    {
00565       // Don't need to check m_buf for NULL, because if length() == 0,
00566       // this code won't be executed.
00567       const char* p = String::strchr(m_buf->data()+fromIndex, ch);
00568       if (p)
00569       {
00570          cc = p - m_buf->data();
00571       }
00572    }
00573    return cc;
00574 }
00576 size_t
00577 String::indexOf(const char* arg, size_t fromIndex) const
00578 {
00579    size_t cc = npos;
00580    if (fromIndex < length())
00581    {
00582       // Don't need to check m_buf for NULL, because if length() == 0,
00583       // this code won't be executed
00584       char* p(0);
00585       if (arg && *arg)
00586       {
00587          p = ::strstr(m_buf->data()+fromIndex, arg);
00588       }
00589       else
00590       {
00591          p = m_buf->data()+fromIndex;
00592       }
00593 
00594       if (p != NULL)
00595       {
00596          cc = static_cast<size_t>(p - m_buf->data());
00597       }
00598    }
00599    return cc;
00600 }
00602 size_t
00603 String::lastIndexOf(char ch, size_t fromIndex) const
00604 {
00605    if (fromIndex == npos)
00606    {
00607       if ((fromIndex = length()-1) == npos)
00608       {
00609          return npos;
00610       }
00611    }
00612    size_t cc = npos;
00613    if (fromIndex < length())
00614    {
00615       for (size_t i = fromIndex; i != npos; i--)
00616       {
00617          // Don't need to check m_buf for NULL, because if length() == 0,
00618          // this code won't be executed.
00619          if (m_buf->data()[i] == ch)
00620          {
00621             cc = i;
00622             break;
00623          }
00624       }
00625    }
00626    return cc;
00627 }
00629 size_t
00630 String::lastIndexOf(const char* arg, size_t fromIndex) const
00631 {
00632    if (fromIndex == npos || fromIndex >= length())
00633    {
00634       if (static_cast<int>(fromIndex = length()-1) < 0)
00635       {
00636          return npos;
00637       }
00638    }
00639 
00640    int arglen = (arg) ? ::strlen(arg) : 0;
00641    if (static_cast<int>(fromIndex -= arglen - 1) < 0)
00642    {
00643       return npos;
00644    }
00645    if (!arg)
00646    {
00647       return length() - 1;
00648    }
00649    while (fromIndex != npos)
00650    {
00651       // Don't need to check m_buf for NULL, because if length() == 0,
00652       // this code won't be executed.
00653       if (::strncmp(m_buf->data()+fromIndex, arg, arglen) == 0)
00654       {
00655          break;
00656       }
00657       fromIndex--;
00658    }
00659    return fromIndex;
00660 }
00662 bool
00663 String::startsWith(char arg) const
00664 {
00665    return (m_buf
00666          && m_buf->length()
00667          && m_buf->data()[0] == arg);
00668 }
00669 
00671 bool
00672 String::startsWith(const char* arg, EIgnoreCaseFlag ignoreCase) const
00673 {
00674    bool cc = false;
00675    if (!arg || !*arg) // treat NULL as identical to the empty string
00676    {
00677       return true; // the empty string is a prefix of any string
00678    }
00679 
00680    size_t arglen = ::strlen(arg);
00681    if (arglen <= length())
00682    {
00683       // Don't need to check m_buf for NULL, because if length() == 0,
00684       // this code won't be executed.
00685       if (ignoreCase == E_CASE_INSENSITIVE)
00686       {
00687          cc = (strncmpi(m_buf->data(), arg, arglen) == 0);
00688       }
00689       else
00690       {
00691          cc = (::strncmp(m_buf->data(), arg, arglen) == 0);
00692       }
00693    }
00694    return cc;
00695 }
00697 String
00698 String::substring(size_t beginIndex, size_t len) const
00699 {
00700    String nil;
00701    size_t count = len;
00702    size_t l = length();
00703    if (0 == l)
00704    {
00705       return nil;
00706    }
00707    if (beginIndex >= l)
00708    {
00709       return nil;
00710    }
00711    else if (0 == len)
00712    {
00713       return nil;
00714    }
00715    else if (len == npos)
00716    {
00717       count = l - beginIndex;
00718    }
00719    if (count + beginIndex > l)
00720    {
00721       count = l - beginIndex;
00722    }
00723    // Don't need to check m_buf for NULL, because if length() == 0,
00724    // this code won't be executed.
00725    return String(static_cast<const char*>(m_buf->data()+beginIndex), count);
00726 }
00728 bool
00729 String::isSpaces() const
00730 {
00731    if (!m_buf)
00732    {
00733       return true;
00734    }
00735    char* p = m_buf->data();
00736    while (isspace(*p) && *p != '\0')
00737    {
00738       p++;
00739    }
00740    return (*p == '\0');
00741 }
00743 String&
00744 String::ltrim()
00745 {
00746    if (!m_buf)
00747    {
00748       return *this;
00749    }
00750    char* s1 = m_buf->data();
00751    while (isspace(*s1) && *s1 != '\0')
00752    {
00753       s1++;
00754    }
00755    if (s1 == m_buf->data())
00756    {
00757       return *this;
00758    }
00759    *this = String(s1);
00760    return *this;
00761 }
00763 String&
00764 String::rtrim()
00765 {
00766    if (length() == 0)
00767    {
00768       return *this;
00769    }
00770    char* s1 = m_buf->data() + (length()-1);
00771    while (isspace(*s1) && s1 >= m_buf->data())
00772    {
00773       s1--;
00774    }
00775    if (s1 == (m_buf->data() + (length()-1)))
00776    {
00777       return *this;
00778    }
00779    if (s1 < m_buf->data())
00780    {
00781       *this = String();
00782       return *this;
00783    }
00784    size_t len = (s1 - m_buf->data()) + 1;
00785    *this = String(m_buf->data(), len);
00786    return *this;
00787 }
00789 String&
00790 String::trim()
00791 {
00792    if (length() == 0)
00793    {
00794       return *this;
00795    }
00796    char* s1 = m_buf->data();
00797    while (isspace(*s1) && *s1 != '\0')
00798    {
00799       s1++;
00800    }
00801    if (*s1 == '\0')
00802    {
00803       // String is all spaces
00804       *this = String();
00805       return *this;
00806    }
00807    const char* p2 = String::strchr(s1, '\0');
00808    const char* s2 = p2 - 1;
00809    while (isspace(*s2))
00810    {
00811       s2--;
00812    }
00813    if (s1 == m_buf->data() && s2 == p2)
00814    {
00815       // String has no leading or trailing spaces
00816       return *this;
00817    }
00818    size_t len = (s2 - s1) + 1;
00819    *this = String(s1, len);
00820    return *this;
00821 }
00823 String&
00824 String::erase()
00825 {
00826    m_buf = 0;
00827    return *this;
00828 }
00830 String&
00831 String::erase(size_t idx, size_t len)
00832 {
00833    if ( idx >= length() )
00834    {
00835       return *this;
00836    }
00837    if (len == npos)
00838    {
00839       *this = substring(0, idx);
00840    }
00841    else
00842    {
00843       *this = substring(0, idx) + substring(idx + len);
00844    }
00845    return *this;
00846 }
00848 String&
00849 String::toLowerCase()
00850 {
00851    if (m_buf)
00852    {
00853       if (!UTF8Utils::toLowerCaseInPlace(m_buf->data()))
00854       {
00855          *this = UTF8Utils::toLowerCase(m_buf->data());
00856       }
00857    }
00858    return *this;
00859 }
00860 
00862 String&
00863 String::toUpperCase()
00864 {
00865    if (m_buf)
00866    {
00867       if (!UTF8Utils::toUpperCaseInPlace(m_buf->data()))
00868       {
00869          *this = UTF8Utils::toUpperCase(m_buf->data());
00870       }
00871    }
00872    return *this;
00873 }
00875 void
00876 String::readObject(streambuf & istrm)
00877 {
00878    UInt32 len;
00879    BinarySerialization::readLen(istrm, len);
00880    AutoPtrVec<char> bfr(new char[len+1]);
00881    BinarySerialization::read(istrm, bfr.get(), len);
00882    bfr[len] = '\0';
00883    m_buf = new ByteBuf(bfr, len);
00884 }
00886 void
00887 String::writeObject(streambuf & ostrm) const
00888 {
00889    UInt32 len = static_cast<UInt32>(length());
00890    BinarySerialization::writeLen(ostrm, len);
00891    if (len)
00892    {
00893       BinarySerialization::write(ostrm, m_buf->data(), len);
00894    }
00895 }
00897 String&
00898 String::operator= (const String& arg)
00899 {
00900    m_buf = arg.m_buf;
00901    return *this;
00902 }
00904 const char*
00905 String::c_str() const
00906 {
00907    if (m_buf)
00908    {
00909       return m_buf->data();
00910    }
00911    else
00912    {
00913       return "";
00914    }
00915 }
00917 static const char cnullChar = '\0';
00918 const char&
00919 String::operator[] (size_t ndx) const
00920 {
00921 #ifdef BLOCXX_DEBUG
00922    BLOCXX_ASSERT(ndx <= length());
00923 #endif
00924    if (!m_buf) // then length() == 0
00925    {
00926       // Only needed in case ndx == 0, but doesn't hurt if ndx > 0
00927       const_cast<buf_t &>(m_buf) = new ByteBuf("");
00928    }
00929    return *(ndx <= m_buf->length() ? m_buf->data() + ndx : &cnullChar);
00930 }
00932 static char nullChar = '\0';
00933 char&
00934 String::operator[] (size_t ndx)
00935 {
00936 #ifdef BLOCXX_DEBUG
00937    BLOCXX_ASSERT(ndx <= length());
00938 #endif
00939    if (!m_buf) // then length() == 0
00940    {
00941       // Only needed in case ndx == 0, but doesn't hurt if ndx > 0
00942       m_buf = new ByteBuf("");
00943    }
00944    return (ndx <= m_buf->length() ? m_buf->data()[ndx] : nullChar);
00945 }
00947 String
00948 String::toString() const
00949 {
00950    return *this;
00951 }
00953 static inline void
00954 throwStringConversion(const String::buf_t& m_buf, const char* type)
00955 {
00956    BLOCXX_THROW(StringConversionException, Format("Unable to convert \"%1\" into %2", m_buf->data(), type).c_str());
00957 }
00959 static inline void
00960 throwStringConversion(const char* str, const char* type)
00961 {
00962    BLOCXX_THROW(StringConversionException, Format("Unable to convert \"%1\" into %2", str, type).c_str());
00963 }
00965 Char16
00966 String::toChar16() const
00967 {
00968    if (UTF8Length() != 1)
00969    {
00970       throwStringConversion(c_str(), "Char16");
00971    }
00972    return Char16(*this);
00973 }
00974 template <typename T, typename FP>
00975 static inline
00976 T convertToRealType(const String::buf_t& m_buf, const char* type, FP fp)
00977 {
00978    if (m_buf && m_buf->length() > 0)
00979    {
00980       char* endptr(0);
00981       errno = 0;     // errno is thread local
00982       T rv = fp(m_buf->data(), &endptr);
00983       if (*endptr != '\0' || errno == ERANGE || rv == HUGE_VAL || rv == -HUGE_VAL)
00984       {
00985          throwStringConversion(m_buf, type);
00986       }
00987       return rv;
00988    }
00989    else
00990    {
00991       throwStringConversion("", type);
00992    }
00993    return T(); // to make compiler happy
00994 }
00996 Real32
00997 String::toReal32() const
00998 {
00999 #if defined(BLOCXX_REAL32_IS_FLOAT) && defined(BLOCXX_HAVE_STRTOF)
01000    return convertToRealType<Real32>(m_buf, "Real32", &strtof);
01001 #elif defined(BLOCXX_REAL32_IS_DOUBLE) || (defined(BLOCXX_REAL32_IS_FLOAT) && !defined(BLOCXX_HAVE_STRTOF))
01002    return convertToRealType<Real32>(m_buf, "Real32", &strtod);
01003 #endif
01004 }
01006 Real64
01007 String::toReal64() const
01008 {
01009 #if defined(BLOCXX_REAL64_IS_DOUBLE)
01010    return convertToRealType<Real64>(m_buf, "Real64", &strtod);
01011 #elif defined(BLOCXX_REAL64_IS_LONG_DOUBLE)
01012    return convertToRealType<Real64>(m_buf, "Real64", &strtold);
01013 #endif
01014 }
01016 bool
01017 String::toBool() const
01018 {
01019    if (equalsIgnoreCase("true"))
01020    {
01021       return true;
01022    }
01023    else if (equalsIgnoreCase("false"))
01024    {
01025       return false;
01026    }
01027    else
01028    {
01029       throwStringConversion(c_str(), "bool");
01030    }
01031    return false; // to make compiler happy
01032 }
01033 template <typename T, typename FP, typename FPRT>
01034 static inline
01035 T doConvertToIntType(const String::buf_t& m_buf, const char* type, FP fp, int base)
01036 {
01037    // the error detecting code below won't detect an empty string, so
01038    // we have to check for it explicitly.
01039    if (m_buf && m_buf->length() > 0)
01040    {
01041       char* endptr(0);
01042       errno = 0;     // errno is thread local
01043       FPRT v = fp(m_buf->data(), &endptr, base);
01044       T rv = static_cast<T>(v);
01045       if (*endptr != '\0' || errno == ERANGE || FPRT(rv) != v)
01046       {
01047          throwStringConversion(m_buf, type);
01048       }
01049       return rv;
01050    }
01051    else
01052    {
01053       throwStringConversion("", type);
01054    }
01055    return T(); // to make compiler happy
01056 }
01057 typedef unsigned long int (*strtoulfp_t)(const char *, char **,int);
01058 typedef long int (*strtolfp_t)(const char *, char **,int);
01059 typedef unsigned long long int (*strtoullfp_t)(const char *, char **,int);
01060 typedef long long int (*strtollfp_t)(const char *, char **,int);
01061 template <typename T>
01062 static inline
01063 T convertToUIntType(const String::buf_t& m_buf, const char* msg, int base)
01064 {
01065    return doConvertToIntType<T, strtoulfp_t, unsigned long int>(m_buf, msg, &strtoul, base);
01066 }
01067 template <typename T>
01068 static inline
01069 T convertToIntType(const String::buf_t& m_buf, const char* msg, int base)
01070 {
01071    return doConvertToIntType<T, strtolfp_t, long int>(m_buf, msg, &strtol, base);
01072 }
01073 template <typename T>
01074 static inline
01075 T convertToUInt64Type(const String::buf_t& m_buf, const char* msg, int base)
01076 {
01077    return doConvertToIntType<T, strtoullfp_t, unsigned long long int>(m_buf, msg, &String::strtoull, base);
01078 }
01079 template <typename T>
01080 static inline
01081 T convertToInt64Type(const String::buf_t& m_buf, const char* msg, int base)
01082 {
01083    return doConvertToIntType<T, strtollfp_t, long long int>(m_buf, msg, &String::strtoll, base);
01084 }
01086 UInt8
01087 String::toUInt8(int base) const
01088 {
01089    return convertToUIntType<UInt8>(m_buf, "UInt8", base);
01090 }
01092 Int8
01093 String::toInt8(int base) const
01094 {
01095    return convertToIntType<Int8>(m_buf, "Int8", base);
01096 }
01098 UInt16
01099 String::toUInt16(int base) const
01100 {
01101    return convertToUIntType<UInt16>(m_buf, "UInt16", base);
01102 }
01104 Int16
01105 String::toInt16(int base) const
01106 {
01107    return convertToIntType<Int16>(m_buf, "Int16", base);
01108 }
01110 UInt32
01111 String::toUInt32(int base) const
01112 {
01113    return convertToUIntType<UInt32>(m_buf, "UInt32", base);
01114 }
01116 Int32
01117 String::toInt32(int base) const
01118 {
01119    return convertToIntType<Int32>(m_buf, "Int32", base);
01120 }
01122 UInt64
01123 String::toUInt64(int base) const
01124 {
01125    return convertToUInt64Type<UInt64>(m_buf, "UInt64", base);
01126 }
01128 Int64
01129 String::toInt64(int base) const
01130 {
01131    return convertToInt64Type<Int64>(m_buf, "Int64", base);
01132 }
01134 unsigned int
01135 String::toUnsignedInt(int base) const
01136 {
01137    return convertToUIntType<unsigned int>(m_buf, "unsigned int", base);
01138 }
01140 int
01141 String::toInt(int base) const
01142 {
01143    return convertToIntType<int>(m_buf, "int", base);
01144 }
01146 StringArray
01147 String::tokenize(const char* delims, EReturnDelimitersFlag returnDelimitersAsTokens, EEmptyTokenReturnFlag returnEmptyTokens) const
01148 {
01149    StringArray ra;
01150    if (empty())
01151    {
01152       return ra;
01153    }
01154    if (delims == 0)
01155    {
01156       ra.append(*this);
01157       return ra;
01158    }
01159    // Don't need to check m_buf for NULL, because if length() == 0,
01160    // this code won't be executed.
01161    char* pstr = m_buf->data();
01162    AutoPtrVec<char> data(new char[m_buf->length()+1]);
01163    data[0] = 0;
01164    int i = 0;
01165    bool last_was_delim = false;
01166    while (*pstr)
01167    {
01168       if (String::strchr(delims, *pstr))
01169       {
01170          if (data[0] != 0)
01171          {
01172             ra.append(String(data.get()));
01173             data[0] = 0;
01174          }
01175          if ( (returnEmptyTokens == E_RETURN_EMPTY_TOKENS) && last_was_delim )
01176          {
01177             ra.append(String());
01178          }
01179          if ( returnDelimitersAsTokens == E_RETURN_DELIMITERS || returnDelimitersAsTokens == E_RETURN_TOKENS )
01180          {
01181             ra.append(String(*pstr));
01182          }        
01183          i = 0;
01184          last_was_delim = true;
01185       }
01186       else
01187       {
01188          last_was_delim = false;
01189          data[i++] = *pstr;
01190          data[i] = 0;
01191       }
01192       pstr++;
01193    }
01194    if (data[0] != 0)
01195    {
01196       ra.append(String(data.get()));
01197    }
01198    return ra;
01199 }
01200 
01202 #ifdef BLOCXX_HAVE_STRTOLL
01203 long long int
01204 String::strtoll(const char* nptr, char** endptr, int base)
01205 {
01206    return ::strtoll(nptr, endptr, base);
01207 }
01208 #else
01209 #ifndef LLONG_MAX
01210 #if BLOCXX_SIZEOF_LONG_LONG_INT == 8
01211 #define LLONG_MAX 9223372036854775807LL
01212 #else
01213 #define LLONG_MAX 2147483647LL
01214 #endif
01215 #define LLONG_MIN (-LLONG_MAX - 1LL)
01216 #endif
01217 long long int
01218 String::strtoll(const char* nptr, char** endptr, int base)
01219 {
01220    const char *s;
01221    long long acc, cutoff;
01222    int c;
01223    int neg, any, cutlim;
01224    // Skip white space and pick up leading +/- sign if any.
01225    // If base is 0, allow 0x for hex and 0 for octal, else
01226    // assume decimal; if base is already 16, allow 0x.
01227    s = nptr;
01228    do
01229    {
01230       c = (unsigned char) *s++;
01231    } while (isspace(c));
01232    if (c == '-')
01233    {
01234       neg = 1;
01235       c = *s++;
01236    }
01237    else
01238    {
01239       neg = 0;
01240       if (c == '+')
01241       {
01242          c = *s++;
01243       }
01244    }
01245    if ((base == 0 || base == 16)
01246       && c == '0'
01247       && (*s == 'x' || *s == 'X'))
01248    {
01249       c = s[1];
01250       s += 2;
01251       base = 16;
01252    }
01253    if (base == 0)
01254    {
01255       base = c == '0' ? 8 : 10;
01256    }
01257    // Compute the cutoff value between legal numbers and illegal
01258    // numbers.  That is the largest legal value, divided by the
01259    // base.  An input number that is greater than this value, if
01260    // followed by a legal input character, is too big.  One that
01261    // is equal to this value may be valid or not; the limit
01262    // between valid and invalid numbers is then based on the last
01263    // digit.  For instance, if the range for longs is
01264    // [-2147483648..2147483647] and the input base is 10,
01265    // cutoff will be set to 214748364 and cutlim to either
01266    // 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
01267    // a value > 214748364, or equal but the next digit is > 7 (or 8),
01268    // the number is too big, and we will return a range error.
01269    //
01270    // Set any if any `digits' consumed; make it negative to indicate
01271    // overflow.
01272    cutoff = neg ? LLONG_MIN : LLONG_MAX;
01273    cutlim = static_cast<int>(cutoff % base);
01274    cutoff /= base;
01275    if (neg)
01276    {
01277       if (cutlim > 0)
01278       {
01279          cutlim -= base;
01280          cutoff += 1;
01281       }
01282       cutlim = -cutlim;
01283    }
01284    for (acc = 0, any = 0;; c = (unsigned char) *s++)
01285    {
01286       if (isdigit(c))
01287       {
01288          c -= '0';
01289       }
01290       else if (isalpha(c))
01291       {
01292          c -= isupper(c) ? 'A' - 10 : 'a' - 10;
01293       }
01294       else
01295       {
01296          break;
01297       }
01298       if (c >= base)
01299       {
01300          break;
01301       }
01302       if (any < 0)
01303       {
01304          continue;
01305       }
01306       if (neg)
01307       {
01308          if (acc < cutoff || acc == cutoff && c > cutlim)
01309          {
01310             any = -1;
01311             acc = LLONG_MIN;
01312             errno = ERANGE;
01313          }
01314          else
01315          {
01316             any = 1;
01317             acc *= base;
01318             acc -= c;
01319          }
01320       }
01321       else
01322       {
01323          if (acc > cutoff || acc == cutoff && c > cutlim)
01324          {
01325             any = -1;
01326             acc = LLONG_MAX;
01327             errno = ERANGE;
01328          }
01329          else
01330          {
01331             any = 1;
01332             acc *= base;
01333             acc += c;
01334          }
01335       }
01336    }
01337    if (endptr != 0)
01338    {
01339       *endptr = (char *) (any ? s - 1 : nptr);
01340    }
01341    return(acc);
01342 }
01343 #endif   // #ifdef BLOCXX_HAVE_STRTOLL
01344 
01346 #ifdef BLOCXX_HAVE_STRTOULL
01347 unsigned long long int
01348 String::strtoull(const char* nptr, char** endptr, int base)
01349 {
01350    return ::strtoull(nptr, endptr, base);
01351 }
01352 #else
01353 #ifndef ULLONG_MAX
01354 #if BLOCXX_SIZEOF_LONG_LONG_INT == 8
01355 #define ULLONG_MAX 18446744073709551615ULL
01356 #else
01357 #define ULLONG_MAX 4294967295ULL
01358 #endif
01359 #endif
01360 unsigned long long int
01361 String::strtoull(const char* nptr, char** endptr, int base)
01362 {
01363    const char *s;
01364    unsigned long long acc, cutoff, cutlim;
01365    unsigned int c;
01366    int neg, any;
01367    s = nptr;
01368    do
01369    {
01370       c = (unsigned char) *s++;
01371    } while (isspace(c));
01372    if (c == '-')
01373    {
01374       neg = 1;
01375       c = *s++;
01376    }
01377    else
01378    {
01379       neg = 0;
01380       if (c == '+')
01381       {
01382          c = *s++;
01383       }
01384    }
01385    if ((base == 0 || base == 16)
01386       && c == '0'
01387       && (*s == 'x' || *s == 'X'))
01388    {
01389       c = s[1];
01390       s += 2;
01391       base = 16;
01392    }
01393    if (base == 0)
01394    {
01395       base = c == '0' ? 8 : 10;
01396    }
01397    cutoff = ULLONG_MAX / (unsigned long long)base;
01398    cutlim = ULLONG_MAX % (unsigned long long)base;
01399    for (acc = 0, any = 0;; c = (unsigned char) *s++)
01400    {
01401       if (isdigit(c))
01402       {
01403          c -= '0';
01404       }
01405       else if (isalpha(c))
01406       {
01407          c -= isupper(c) ? 'A' - 10 : 'a' - 10;
01408       }
01409       else
01410       {
01411          break;
01412       }
01413       if (c >= (unsigned int)base)
01414       {
01415          break;
01416       }
01417       if (any < 0)
01418       {
01419          continue;
01420       }
01421       if (acc > cutoff || acc == cutoff && c > cutlim)
01422       {
01423          any = -1;
01424          acc = ULLONG_MAX;
01425          errno = ERANGE;
01426       }
01427       else
01428       {
01429          any = 1;
01430          acc *= (unsigned long)base;
01431          acc += c;
01432       }
01433    }
01434    if (neg && any > 0)
01435    {
01436 #ifdef BLOCXX_WIN32
01437 #pragma warning (push)
01438 #pragma warning (disable: 4146)
01439 #endif
01440 
01441       acc = -acc;
01442 
01443 #ifdef BLOCXX_WIN32
01444 #pragma warning (pop)
01445 #endif
01446 
01447    }
01448    if (endptr != 0)
01449    {
01450       *endptr = (char *) (any ? s - 1 : nptr);
01451    }
01452    return(acc);
01453 }
01454 #endif   // #ifdef BLOCXX_HAVE_STRTOULL
01455 
01456 String
01457 operator + (const String& s1, const String& s2)
01458 {
01459    String rstr(s1);
01460    rstr += s2;
01461    return rstr;
01462 }
01464 String
01465 operator + (const char* p, const String& s)
01466 {
01467    String rstr(p);
01468    rstr += s;
01469    return rstr;
01470 }
01472 String
01473 operator + (const String& s, const char* p)
01474 {
01475    String rstr(s);
01476    rstr += p;
01477    return rstr;
01478 }
01480 String
01481 operator + (char c, const String& s)
01482 {
01483    String rstr(c);
01484    rstr += s;
01485    return rstr;
01486 }
01488 String
01489 operator + (const String& s, char c)
01490 {
01491    String rstr(s);
01492    rstr += String(c);
01493    return rstr;
01494 }
01496 ostream&
01497 operator<< (ostream& ostr, const String& arg)
01498 {
01499    ostr.write(arg.c_str(), arg.length());
01500    return ostr;
01501 }
01503 // static
01504 String
01505 String::getLine(istream& is)
01506 {
01507    StringBuffer rv(80);
01508    rv.getLine(is);
01509    return rv.releaseString();
01510 }
01512 // STATIC
01513 const char*
01514 String::strchr(const char* theStr, int c)
01515 {
01516    const char* tmpChar = theStr;
01517    for (; *tmpChar && *tmpChar != c; tmpChar++)
01518    {
01519       // empty
01520    }
01521    return ((*tmpChar) == c ? tmpChar : 0);
01522 }
01523 
01524 } // end namespace BLOCXX_NAMESPACE
01525