blocxx

IConv.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002 * Copyright (C) 2005 Novell, Inc. All rights reserved.
00003 *
00004 * Redistribution and use in source and binary forms, with or without
00005 * modification, are permitted provided that the following conditions are met:
00006 *
00007 *  - Redistributions of source code must retain the above copyright notice,
00008 *    this list of conditions and the following disclaimer.
00009 *
00010 *  - Redistributions in binary form must reproduce the above copyright notice,
00011 *    this list of conditions and the following disclaimer in the documentation
00012 *    and/or other materials provided with the distribution.
00013 *
00014 *  - Neither the name of Novell, Inc., nor the names of its
00015 *    contributors may be used to endorse or promote products derived from this
00016 *    software without specific prior written permission.
00017 *
00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 * ARE DISCLAIMED. IN NO EVENT SHALL Novell, Inc., OR THE 
00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 *******************************************************************************/
00034 #include "blocxx/IConv.hpp"
00035 
00036 #if defined(BLOCXX_HAVE_ICONV_SUPPORT)
00037 #include "blocxx/Assertion.hpp"
00038 #include "blocxx/Format.hpp"
00039 #include "blocxx/Exec.hpp"
00040 
00041 #include <cwchar>
00042 #include <cwctype>
00043 
00044 #include <errno.h>
00045 
00046 namespace BLOCXX_NAMESPACE
00047 {
00048 
00049 // -------------------------------------------------------------------
00050 IConv_t::IConv_t()
00051    : m_iconv(iconv_t(-1))
00052 {
00053 }
00054 
00055 
00056 // -------------------------------------------------------------------
00057 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding)
00058 {
00059    m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00060    if( m_iconv == iconv_t(-1))
00061    {
00062       BLOCXX_THROW(StringConversionException,
00063                    Format("Unable to convert from \"%1\" to \"%2\"",
00064                           fromEncoding, toEncoding).c_str());
00065    }
00066 }
00067 
00068 
00069 // -------------------------------------------------------------------
00070 IConv_t::~IConv_t()
00071 {
00072    close();
00073 }
00074 
00075 
00076 // -------------------------------------------------------------------
00077 bool
00078 IConv_t::open(const String &fromEncoding, const String &toEncoding)
00079 {
00080    close();
00081    m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00082    return ( m_iconv != iconv_t(-1));
00083 }
00084 
00085 
00086 // -------------------------------------------------------------------
00087 size_t
00088 IConv_t::convert(char **istr, size_t *ibytesleft,
00089                char **ostr, size_t *obytesleft)
00090 {
00091 #if defined(BLOCXX_ICONV_INBUF_CONST)
00092    BLOCXX_ASSERT(istr != NULL); 
00093    const char *ptr = *istr;
00094    int ret = ::iconv(m_iconv, &ptr, ibytesleft, ostr, obytesleft); 
00095    *istr = const_cast<char*>(ptr); 
00096    return ret; 
00097 #else
00098    return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft);
00099 #endif
00100 }
00101 
00102 
00103 // -------------------------------------------------------------------
00104 bool
00105 IConv_t::close()
00106 {
00107    bool ret = true;
00108    int  err = errno;
00109 
00110    if( m_iconv != iconv_t(-1))
00111    {
00112       if( ::iconv_close(m_iconv) == -1)
00113          ret = false;
00114       m_iconv = iconv_t(-1);
00115    }
00116 
00117    errno = err;
00118    return ret;
00119 }
00120 
00121 
00122 // *******************************************************************
00123 namespace IConv
00124 {
00125 
00126 // -------------------------------------------------------------------
00127 static inline void
00128 mayThrowStringConversionException()
00129 {
00130    switch( errno)
00131    {
00132       case E2BIG:
00133       break;
00134 
00135       case EILSEQ:
00136          BLOCXX_THROW(StringConversionException,
00137          "Invalid character or multibyte sequence in the input");
00138       break;
00139 
00140       case EINVAL:
00141       default:
00142          BLOCXX_THROW(StringConversionException,
00143          "Incomplete multibyte sequence in the input");
00144       break;
00145    }
00146 }
00147 
00148 // -------------------------------------------------------------------
00149 String
00150 fromByteString(const String &enc, const char *str, size_t len)
00151 {
00152    if( !str || len == 0)
00153       return String();
00154 
00155    IConv_t      iconv(enc, "UTF-8"); // throws error
00156    String       out;
00157    char         obuf[4097];
00158    char        *optr;
00159    size_t       olen;
00160 
00161    char        *sptr = (char *)str;
00162    size_t       slen = len;
00163 
00164    while( slen > 0)
00165    {
00166       obuf[0] = '\0';
00167       optr = (char *)obuf;
00168       olen = sizeof(obuf) - sizeof(obuf[0]);
00169 
00170       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00171       if( ret == size_t(-1))
00172       {
00173          mayThrowStringConversionException();
00174       }
00175       *optr = '\0';
00176       out  += obuf;
00177    }
00178 
00179    return out;
00180 }
00181 
00182 
00183 // -------------------------------------------------------------------
00184 String
00185 fromByteString(const String &enc, const std::string  &str)
00186 {
00187    return fromByteString(enc, str.c_str(), str.length());
00188 }
00189 
00190 
00191 #ifdef BLOCXX_HAVE_STD_WSTRING
00192 // -------------------------------------------------------------------
00193 String
00194 fromWideString(const String &enc, const std::wstring &str)
00195 {
00196    if( str.empty())
00197       return String();
00198 
00199    IConv_t      iconv(enc, "UTF-8"); // throws error
00200    String       out;
00201    char         obuf[4097];
00202    char        *optr;
00203    size_t       olen;
00204 
00205    char        *sptr = (char *)str.c_str();
00206    size_t       slen = str.length() * sizeof(wchar_t);
00207 
00208    while( slen > 0)
00209    {
00210       obuf[0] = '\0';
00211       optr = (char *)obuf;
00212       olen = sizeof(obuf) - sizeof(obuf[0]);
00213 
00214       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00215       if( ret == size_t(-1))
00216       {
00217          mayThrowStringConversionException();
00218       }
00219       *optr = '\0';
00220       out  += obuf;
00221    }
00222 
00223    return out;
00224 }
00225 #endif
00226 
00227 // -------------------------------------------------------------------
00228 std::string
00229 toByteString(const String &enc, const String &utf8)
00230 {
00231    if( utf8.empty())
00232       return std::string();
00233 
00234    IConv_t      iconv("UTF-8", enc); // throws error
00235    std::string  out;
00236    char         obuf[4097];
00237    char        *optr;
00238    size_t       olen;
00239 
00240    char        *sptr = (char *)utf8.c_str();
00241    size_t       slen = utf8.length();
00242 
00243    while( slen > 0)
00244    {
00245       obuf[0] = '\0';
00246       optr = (char *)obuf;
00247       olen = sizeof(obuf) - sizeof(obuf[0]);
00248 
00249       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00250       if( ret == size_t(-1))
00251       {
00252          mayThrowStringConversionException();
00253       }
00254       *optr = '\0';
00255       out  += obuf;
00256    }
00257 
00258    return out;
00259 }
00260 
00261 #ifdef BLOCXX_HAVE_STD_WSTRING
00262 // -------------------------------------------------------------------
00263 std::wstring
00264 toWideString(const String &enc, const String &utf8)
00265 {
00266    if( utf8.empty())
00267       return std::wstring();
00268 
00269    IConv_t      iconv("UTF-8", enc); // throws error
00270    std::wstring out;
00271    wchar_t      obuf[1025];
00272    char        *optr;
00273    size_t       olen;
00274 
00275    char        *sptr = (char *)utf8.c_str();
00276    size_t       slen = utf8.length();
00277 
00278    while( slen > 0)
00279    {
00280       obuf[0] = L'\0';
00281       optr = (char *)obuf;
00282       olen = sizeof(obuf) - sizeof(obuf[0]);
00283 
00284       size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00285       if( ret == size_t(-1))
00286       {
00287          mayThrowStringConversionException();
00288       }
00289       *((wchar_t *)optr) = L'\0';
00290       out += obuf;
00291    }
00292 
00293    return out;
00294 }
00295 #endif
00296 
00297 
00298 #if 0
00299 // -------------------------------------------------------------------
00300 StringArray
00301 encodings()
00302 {
00303    StringArray   command;
00304    String        output;
00305    int           status = -1;
00306 
00307    command.push_back("/usr/bin/iconv");
00308    command.push_back("--list");
00309 
00310    try
00311    {
00312       Exec::executeProcessAndGatherOutput(command, output, status);
00313    }
00314    catch(...)
00315    {
00316    }
00317 
00318    if(status == 0)
00319    {
00320       return output.tokenize("\r\n");
00321    }
00322    return StringArray();
00323 }
00324 #endif
00325 
00326 
00327 }  // End of IConv namespace
00328 }  // End of BLOCXX_NAMESPACE
00329 
00330 #endif // BLOCXX_HAVE_ICONV_SUPPORT
00331 
00332 /* vim: set ts=8 sts=8 sw=8 ai noet: */
00333