当前位置:网站首页>tars源码分析之10
tars源码分析之10
2022-07-04 06:33:00 【涛歌依旧】
gbk和utf8是你永远绕不开的话题,怎么转化呢?
而且,我估计你还踩过坑。反正我踩过。
其实很简单,一起来看看代码:
#include <iconv.h>
#include <errno.h>
#include <string.h>
#include "util/tc_encoder.h"
namespace tars
{
void TC_Encoder::gbk2utf8(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
char * pIn = (char*)sIn;
char * pEnd = pIn+iInLen;
char * pOut = sOut;
size_t iLeftLen;
size_t iGbkLen;
iconv_t cd;
if (iInLen > iMaxOutLen)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iInLen > iMaxOutLen error : ", errno);
}
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error : ", errno);
}
iLeftLen = iMaxOutLen;
while(pIn < pEnd)
{
if((unsigned char)(*pIn)==0x80)
{
//注意GBK的0x80转换为UTF-8时为E2 82 AC
*pOut = 0xe2; pOut++; iLeftLen--;
*pOut = 0x82; pOut++; iLeftLen--;
*pOut = 0xac; pOut++; iLeftLen--;
pIn++;
}
else if((unsigned char)(*pIn)<0x80)
{
//单字节(GBK: 0x00-0x7F)
*pOut = *pIn;
pIn++;pOut++;iLeftLen--;
}
else
{
//双字节
iGbkLen=2;
int iRet=iconv(cd, &pIn, (size_t *)&iGbkLen, (char **)&pOut, (size_t *)&iLeftLen);
if(iRet < 0)
{
*pOut = ' '; //转换不了替换为空格
pIn+=2; pOut++; iLeftLen--;
}
}
}
iconv_close(cd);
sOut[iMaxOutLen - iLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - iLeftLen;
}
string TC_Encoder::gbk2utf8(const string &sIn)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
string sOut;
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
if((unsigned char)sIn[pos] == 0x80)
{
//注意GBK的0x80转换为UTF-8时为E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
//单字节(GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
//双字节
size_t sizeGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, sizeGbkLen);
char *p = pIn;
size_t sizeLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, &sizeGbkLen, (char **)&o, &sizeLeftLen);
if(iRet < 0)
{
//转换不了, 暂时替换为空格
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
}
iconv_close(cd);
return sOut;
}
void TC_Encoder::gbk2utf8(const string &sIn, vector<string> &vtStr)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
vtStr.clear();
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
string sOut;
if((unsigned char)sIn[pos] == 0x80)
{
//注意GBK的0x80转换为UTF-8时为E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
//单字节(GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
//双字节
size_t iGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, iGbkLen);
char *p = pIn;
size_t iLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, (size_t *)&iGbkLen, (char **)&o, (size_t *)&iLeftLen);
if(iRet < 0)
{
//转换不了, 暂时替换为空格
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
vtStr.push_back(sOut);
}
iconv_close(cd);
}
void TC_Encoder::utf82gbk(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
iconv_t cd;
cd = iconv_open("GBK","UTF-8");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv_open error", errno);
}
char * pIn = (char*)sIn;
size_t sizeLeftLen = iMaxOutLen;
size_t sizeInLen = iInLen;
char* pOut = sOut;
size_t ret = iconv(cd, &pIn, &sizeInLen, (char **)&sOut, &sizeLeftLen);
if (ret == (size_t) - 1)
{
iMaxOutLen = 0;
iconv_close(cd);
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv error", errno);
return;
}
iconv_close(cd);
pOut[iMaxOutLen - (int)sizeLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - (int)sizeLeftLen;
}
string TC_Encoder::utf82gbk(const string &sIn)
{
if(sIn.length() == 0)
{
return "";
}
string sOut;
int iLen = sIn.length() * 2 + 1;
char *pOut = new char[iLen];
try
{
utf82gbk(pOut, iLen, sIn.c_str(), sIn.length());
}
catch (TC_Encoder_Exception& e)
{
delete[] pOut;
throw e;
}
sOut.assign(pOut, iLen);
delete[] pOut;
return sOut;
}
/**
* \n -> \r\0
* \r -> \r\r
*/
string TC_Encoder::transTo(const string& str, char f /*='\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = str;
for (size_t i = 0; i < ret.length(); ++i)
{
if (ret[i] == f)
{
ret[i] = t;
ret.insert(++i, 1, u);
}
else if (ret[i] == t)
{
ret.insert(++i, 1, t);
}
}
return ret;
}
/**
* \r\0 -> \n
* \r\r -> \r
*/
string TC_Encoder::transFrom(const string& str, char f /*= '\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = "";
for (string::const_iterator it = str.begin()
; it != str.end()
; ++it)
{
ret.append(1, *it);
if (*it == t)
{
if (*(++it) == u)
{
*ret.rbegin() = f;
}
}
}
return ret;
}
}
边栏推荐
- Stc8h development (XII): I2C drive AT24C08, at24c32 series EEPROM storage
- MySQL learning notes 3 - JDBC
- [March 3, 2019] MAC starts redis
- [number theory] fast power (Euler power)
- C # symmetric encryption (AES encryption) ciphertext results generated each time, different ideas, code sharing
- Vant --- detailed explanation and use of list component in vant
- Review | categories and mechanisms of action of covid-19 neutralizing antibodies and small molecule drugs
- 7. Agency mode
- C语言中的排序,实现从小到大的数字排序法
- Learning multi-level structural information for small organ segmentation
猜你喜欢

198. House raiding

Arcpy 利用updatelayer函数改变图层的符号系统

C language - Blue Bridge Cup - Snake filling

JSON web token -- comparison between JWT and traditional session login authentication

R statistical mapping - random forest classification analysis and species abundance difference test combination diagram

测试岗的中年危机该如何选择?是坚守还是另寻出路?且看下文

Variables d'environnement personnalisées uniapp

MySQL learning notes 3 - JDBC

Bicolor case

R统计绘图-随机森林分类分析及物种丰度差异检验组合图
随机推荐
2022.7.2-----leetcode.871
Sleep quality today 78 points
Arcpy uses the updatelayer function to change the symbol system of the layer
C realize Snake games
Data analysis notes 09
Invalid bound statement (not found): com. example. mapper. TblUserRecordMapper. login
R统计绘图-随机森林分类分析及物种丰度差异检验组合图
C réaliser des jeux de serpents gourmands
ES6 modularization
QT qtablewidget table column top requirements ideas and codes
如何实现视频平台会员多账号登录
Can the out of sequence message complete TCP three handshakes
C實現貪吃蛇小遊戲
A little understanding of GSLB (global server load balance) technology
uniapp 自定义环境变量
Nexus 6p downgraded from 8.0 to 6.0+root
What is Gibson's law?
颈椎、脚气
MySQL的information_schema数据库
STC8H开发(十二): I2C驱动AT24C08,AT24C32系列EEPROM存储