当前位置:网站首页>Tar source code analysis Part 10
Tar source code analysis Part 10
2022-07-04 06:37:00 【Tao song remains the same】
gbk and utf8 It's a topic you can never get around , How to transform it ?
and , I guess you've stepped on the pit . I stepped on it anyway .
It's very simple , Let's take a look at the code :
#include <iconv.h>
#include <errno.h>
#include <string.h>
#include "util/tc_encoder.h"
namespace tars
{
void TC_Encoder::gbk2utf8(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
char * pIn = (char*)sIn;
char * pEnd = pIn+iInLen;
char * pOut = sOut;
size_t iLeftLen;
size_t iGbkLen;
iconv_t cd;
if (iInLen > iMaxOutLen)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iInLen > iMaxOutLen error : ", errno);
}
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error : ", errno);
}
iLeftLen = iMaxOutLen;
while(pIn < pEnd)
{
if((unsigned char)(*pIn)==0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
*pOut = 0xe2; pOut++; iLeftLen--;
*pOut = 0x82; pOut++; iLeftLen--;
*pOut = 0xac; pOut++; iLeftLen--;
pIn++;
}
else if((unsigned char)(*pIn)<0x80)
{
// Single byte (GBK: 0x00-0x7F)
*pOut = *pIn;
pIn++;pOut++;iLeftLen--;
}
else
{
// Double byte
iGbkLen=2;
int iRet=iconv(cd, &pIn, (size_t *)&iGbkLen, (char **)&pOut, (size_t *)&iLeftLen);
if(iRet < 0)
{
*pOut = ' '; // Conversion cannot be replaced with spaces
pIn+=2; pOut++; iLeftLen--;
}
}
}
iconv_close(cd);
sOut[iMaxOutLen - iLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - iLeftLen;
}
string TC_Encoder::gbk2utf8(const string &sIn)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
string sOut;
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
if((unsigned char)sIn[pos] == 0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
// Single byte (GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
// Double byte
size_t sizeGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, sizeGbkLen);
char *p = pIn;
size_t sizeLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, &sizeGbkLen, (char **)&o, &sizeLeftLen);
if(iRet < 0)
{
// It can't be converted , Temporarily replace with spaces
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
}
iconv_close(cd);
return sOut;
}
void TC_Encoder::gbk2utf8(const string &sIn, vector<string> &vtStr)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
vtStr.clear();
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
string sOut;
if((unsigned char)sIn[pos] == 0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
// Single byte (GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
// Double byte
size_t iGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, iGbkLen);
char *p = pIn;
size_t iLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, (size_t *)&iGbkLen, (char **)&o, (size_t *)&iLeftLen);
if(iRet < 0)
{
// It can't be converted , Temporarily replace with spaces
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
vtStr.push_back(sOut);
}
iconv_close(cd);
}
void TC_Encoder::utf82gbk(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
iconv_t cd;
cd = iconv_open("GBK","UTF-8");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv_open error", errno);
}
char * pIn = (char*)sIn;
size_t sizeLeftLen = iMaxOutLen;
size_t sizeInLen = iInLen;
char* pOut = sOut;
size_t ret = iconv(cd, &pIn, &sizeInLen, (char **)&sOut, &sizeLeftLen);
if (ret == (size_t) - 1)
{
iMaxOutLen = 0;
iconv_close(cd);
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv error", errno);
return;
}
iconv_close(cd);
pOut[iMaxOutLen - (int)sizeLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - (int)sizeLeftLen;
}
string TC_Encoder::utf82gbk(const string &sIn)
{
if(sIn.length() == 0)
{
return "";
}
string sOut;
int iLen = sIn.length() * 2 + 1;
char *pOut = new char[iLen];
try
{
utf82gbk(pOut, iLen, sIn.c_str(), sIn.length());
}
catch (TC_Encoder_Exception& e)
{
delete[] pOut;
throw e;
}
sOut.assign(pOut, iLen);
delete[] pOut;
return sOut;
}
/**
* \n -> \r\0
* \r -> \r\r
*/
string TC_Encoder::transTo(const string& str, char f /*='\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = str;
for (size_t i = 0; i < ret.length(); ++i)
{
if (ret[i] == f)
{
ret[i] = t;
ret.insert(++i, 1, u);
}
else if (ret[i] == t)
{
ret.insert(++i, 1, t);
}
}
return ret;
}
/**
* \r\0 -> \n
* \r\r -> \r
*/
string TC_Encoder::transFrom(const string& str, char f /*= '\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = "";
for (string::const_iterator it = str.begin()
; it != str.end()
; ++it)
{
ret.append(1, *it);
if (*it == t)
{
if (*(++it) == u)
{
*ret.rbegin() = f;
}
}
}
return ret;
}
}
边栏推荐
猜你喜欢
24 magicaccessorimpl can access the debugging of all methods
如何实现视频平台会员多账号登录
regular expression
QT get random color value and set label background color code
How does apscheduler set tasks not to be concurrent (that is, execute the next task after the first one)?
雲原生——上雲必讀之SSH篇(常用於遠程登錄雲服務器)
Variables d'environnement personnalisées uniapp
How to realize multi account login of video platform members
Appium foundation - appium installation (II)
After the festival, a large number of people change careers. Is it still time to be 30? Listen to the experience of the past people
随机推荐
Displaying currency in Indian numbering format
Native Cloud - SSH articles must be read on Cloud (used for Remote Login to Cloud Server)
JSON Web Token----JWT和传统session登录认证对比
C實現貪吃蛇小遊戲
[untitled]
tars源码分析之9
Google Chrome Portable Google Chrome browser portable version official website download method
Mysql 45讲学习笔记(十四)count(*)
2022 where to find enterprise e-mail and which is the security of enterprise e-mail system?
SQL injection SQL lab 11~22
Operator < <> > fool test case
uniapp 自定义环境变量
What is tweeman's law?
1、 Relevant theories and tools of network security penetration testing
Mysql 45讲学习笔记(十三)表数据删掉一半,表文件大小不变
Mysql 45讲学习笔记(七)行锁
Software keywords and process information intercepted by Golden Shield video player
ADC voltage calculation of STM32 single chip microcomputer
7. Agency mode
如何实现视频平台会员多账号登录