blocxx
|
00001 /******************************************************************************* 00002 * Copyright (C) 2005 Novell, Inc. All rights reserved. 00003 * 00004 * Redistribution and use in source and binary forms, with or without 00005 * modification, are permitted provided that the following conditions are met: 00006 * 00007 * - Redistributions of source code must retain the above copyright notice, 00008 * this list of conditions and the following disclaimer. 00009 * 00010 * - Redistributions in binary form must reproduce the above copyright notice, 00011 * this list of conditions and the following disclaimer in the documentation 00012 * and/or other materials provided with the distribution. 00013 * 00014 * - Neither the name of Novell, Inc., nor the names of its 00015 * contributors may be used to endorse or promote products derived from this 00016 * software without specific prior written permission. 00017 * 00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00021 * ARE DISCLAIMED. IN NO EVENT SHALL Novell, Inc., OR THE 00022 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00023 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00024 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 00025 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00026 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 00027 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 00028 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 *******************************************************************************/ 00034 #include "blocxx/IConv.hpp" 00035 00036 #if defined(BLOCXX_HAVE_ICONV_SUPPORT) 00037 #include "blocxx/Assertion.hpp" 00038 #include "blocxx/Format.hpp" 00039 #include "blocxx/Exec.hpp" 00040 00041 #include <cwchar> 00042 #include <cwctype> 00043 00044 #include <errno.h> 00045 00046 namespace BLOCXX_NAMESPACE 00047 { 00048 00049 // ------------------------------------------------------------------- 00050 IConv_t::IConv_t() 00051 : m_iconv(iconv_t(-1)) 00052 { 00053 } 00054 00055 00056 // ------------------------------------------------------------------- 00057 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding) 00058 { 00059 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str()); 00060 if( m_iconv == iconv_t(-1)) 00061 { 00062 BLOCXX_THROW(StringConversionException, 00063 Format("Unable to convert from \"%1\" to \"%2\"", 00064 fromEncoding, toEncoding).c_str()); 00065 } 00066 } 00067 00068 00069 // ------------------------------------------------------------------- 00070 IConv_t::~IConv_t() 00071 { 00072 close(); 00073 } 00074 00075 00076 // ------------------------------------------------------------------- 00077 bool 00078 IConv_t::open(const String &fromEncoding, const String &toEncoding) 00079 { 00080 close(); 00081 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str()); 00082 return ( m_iconv != iconv_t(-1)); 00083 } 00084 00085 00086 // ------------------------------------------------------------------- 00087 size_t 00088 IConv_t::convert(char **istr, size_t *ibytesleft, 00089 char **ostr, size_t *obytesleft) 00090 { 00091 #if defined(BLOCXX_ICONV_INBUF_CONST) 00092 BLOCXX_ASSERT(istr != NULL); 00093 const char *ptr = *istr; 00094 int ret = ::iconv(m_iconv, &ptr, ibytesleft, ostr, obytesleft); 00095 *istr = const_cast<char*>(ptr); 00096 return ret; 00097 #else 00098 return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft); 00099 #endif 00100 } 00101 00102 00103 // ------------------------------------------------------------------- 00104 bool 00105 IConv_t::close() 00106 { 00107 bool ret = true; 00108 int err = errno; 00109 00110 if( m_iconv != iconv_t(-1)) 00111 { 00112 if( ::iconv_close(m_iconv) == -1) 00113 ret = false; 00114 m_iconv = iconv_t(-1); 00115 } 00116 00117 errno = err; 00118 return ret; 00119 } 00120 00121 00122 // ******************************************************************* 00123 namespace IConv 00124 { 00125 00126 // ------------------------------------------------------------------- 00127 static inline void 00128 mayThrowStringConversionException() 00129 { 00130 switch( errno) 00131 { 00132 case E2BIG: 00133 break; 00134 00135 case EILSEQ: 00136 BLOCXX_THROW(StringConversionException, 00137 "Invalid character or multibyte sequence in the input"); 00138 break; 00139 00140 case EINVAL: 00141 default: 00142 BLOCXX_THROW(StringConversionException, 00143 "Incomplete multibyte sequence in the input"); 00144 break; 00145 } 00146 } 00147 00148 // ------------------------------------------------------------------- 00149 String 00150 fromByteString(const String &enc, const char *str, size_t len) 00151 { 00152 if( !str || len == 0) 00153 return String(); 00154 00155 IConv_t iconv(enc, "UTF-8"); // throws error 00156 String out; 00157 char obuf[4097]; 00158 char *optr; 00159 size_t olen; 00160 00161 char *sptr = (char *)str; 00162 size_t slen = len; 00163 00164 while( slen > 0) 00165 { 00166 obuf[0] = '\0'; 00167 optr = (char *)obuf; 00168 olen = sizeof(obuf) - sizeof(obuf[0]); 00169 00170 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen); 00171 if( ret == size_t(-1)) 00172 { 00173 mayThrowStringConversionException(); 00174 } 00175 *optr = '\0'; 00176 out += obuf; 00177 } 00178 00179 return out; 00180 } 00181 00182 00183 // ------------------------------------------------------------------- 00184 String 00185 fromByteString(const String &enc, const std::string &str) 00186 { 00187 return fromByteString(enc, str.c_str(), str.length()); 00188 } 00189 00190 00191 #ifdef BLOCXX_HAVE_STD_WSTRING 00192 // ------------------------------------------------------------------- 00193 String 00194 fromWideString(const String &enc, const std::wstring &str) 00195 { 00196 if( str.empty()) 00197 return String(); 00198 00199 IConv_t iconv(enc, "UTF-8"); // throws error 00200 String out; 00201 char obuf[4097]; 00202 char *optr; 00203 size_t olen; 00204 00205 char *sptr = (char *)str.c_str(); 00206 size_t slen = str.length() * sizeof(wchar_t); 00207 00208 while( slen > 0) 00209 { 00210 obuf[0] = '\0'; 00211 optr = (char *)obuf; 00212 olen = sizeof(obuf) - sizeof(obuf[0]); 00213 00214 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen); 00215 if( ret == size_t(-1)) 00216 { 00217 mayThrowStringConversionException(); 00218 } 00219 *optr = '\0'; 00220 out += obuf; 00221 } 00222 00223 return out; 00224 } 00225 #endif 00226 00227 // ------------------------------------------------------------------- 00228 std::string 00229 toByteString(const String &enc, const String &utf8) 00230 { 00231 if( utf8.empty()) 00232 return std::string(); 00233 00234 IConv_t iconv("UTF-8", enc); // throws error 00235 std::string out; 00236 char obuf[4097]; 00237 char *optr; 00238 size_t olen; 00239 00240 char *sptr = (char *)utf8.c_str(); 00241 size_t slen = utf8.length(); 00242 00243 while( slen > 0) 00244 { 00245 obuf[0] = '\0'; 00246 optr = (char *)obuf; 00247 olen = sizeof(obuf) - sizeof(obuf[0]); 00248 00249 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen); 00250 if( ret == size_t(-1)) 00251 { 00252 mayThrowStringConversionException(); 00253 } 00254 *optr = '\0'; 00255 out += obuf; 00256 } 00257 00258 return out; 00259 } 00260 00261 #ifdef BLOCXX_HAVE_STD_WSTRING 00262 // ------------------------------------------------------------------- 00263 std::wstring 00264 toWideString(const String &enc, const String &utf8) 00265 { 00266 if( utf8.empty()) 00267 return std::wstring(); 00268 00269 IConv_t iconv("UTF-8", enc); // throws error 00270 std::wstring out; 00271 wchar_t obuf[1025]; 00272 char *optr; 00273 size_t olen; 00274 00275 char *sptr = (char *)utf8.c_str(); 00276 size_t slen = utf8.length(); 00277 00278 while( slen > 0) 00279 { 00280 obuf[0] = L'\0'; 00281 optr = (char *)obuf; 00282 olen = sizeof(obuf) - sizeof(obuf[0]); 00283 00284 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen); 00285 if( ret == size_t(-1)) 00286 { 00287 mayThrowStringConversionException(); 00288 } 00289 *((wchar_t *)optr) = L'\0'; 00290 out += obuf; 00291 } 00292 00293 return out; 00294 } 00295 #endif 00296 00297 00298 #if 0 00299 // ------------------------------------------------------------------- 00300 StringArray 00301 encodings() 00302 { 00303 StringArray command; 00304 String output; 00305 int status = -1; 00306 00307 command.push_back("/usr/bin/iconv"); 00308 command.push_back("--list"); 00309 00310 try 00311 { 00312 Exec::executeProcessAndGatherOutput(command, output, status); 00313 } 00314 catch(...) 00315 { 00316 } 00317 00318 if(status == 0) 00319 { 00320 return output.tokenize("\r\n"); 00321 } 00322 return StringArray(); 00323 } 00324 #endif 00325 00326 00327 } // End of IConv namespace 00328 } // End of BLOCXX_NAMESPACE 00329 00330 #endif // BLOCXX_HAVE_ICONV_SUPPORT 00331 00332 /* vim: set ts=8 sts=8 sw=8 ai noet: */ 00333