当前位置:网站首页>tars源码分析之10
tars源码分析之10
2022-07-04 06:33:00 【涛歌依旧】
gbk和utf8是你永远绕不开的话题,怎么转化呢?
而且,我估计你还踩过坑。反正我踩过。
其实很简单,一起来看看代码:
#include <iconv.h>
#include <errno.h>
#include <string.h>
#include "util/tc_encoder.h"
namespace tars
{
void TC_Encoder::gbk2utf8(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
char * pIn = (char*)sIn;
char * pEnd = pIn+iInLen;
char * pOut = sOut;
size_t iLeftLen;
size_t iGbkLen;
iconv_t cd;
if (iInLen > iMaxOutLen)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iInLen > iMaxOutLen error : ", errno);
}
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error : ", errno);
}
iLeftLen = iMaxOutLen;
while(pIn < pEnd)
{
if((unsigned char)(*pIn)==0x80)
{
//注意GBK的0x80转换为UTF-8时为E2 82 AC
*pOut = 0xe2; pOut++; iLeftLen--;
*pOut = 0x82; pOut++; iLeftLen--;
*pOut = 0xac; pOut++; iLeftLen--;
pIn++;
}
else if((unsigned char)(*pIn)<0x80)
{
//单字节(GBK: 0x00-0x7F)
*pOut = *pIn;
pIn++;pOut++;iLeftLen--;
}
else
{
//双字节
iGbkLen=2;
int iRet=iconv(cd, &pIn, (size_t *)&iGbkLen, (char **)&pOut, (size_t *)&iLeftLen);
if(iRet < 0)
{
*pOut = ' '; //转换不了替换为空格
pIn+=2; pOut++; iLeftLen--;
}
}
}
iconv_close(cd);
sOut[iMaxOutLen - iLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - iLeftLen;
}
string TC_Encoder::gbk2utf8(const string &sIn)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
string sOut;
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
if((unsigned char)sIn[pos] == 0x80)
{
//注意GBK的0x80转换为UTF-8时为E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
//单字节(GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
//双字节
size_t sizeGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, sizeGbkLen);
char *p = pIn;
size_t sizeLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, &sizeGbkLen, (char **)&o, &sizeLeftLen);
if(iRet < 0)
{
//转换不了, 暂时替换为空格
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
}
iconv_close(cd);
return sOut;
}
void TC_Encoder::gbk2utf8(const string &sIn, vector<string> &vtStr)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
vtStr.clear();
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
string sOut;
if((unsigned char)sIn[pos] == 0x80)
{
//注意GBK的0x80转换为UTF-8时为E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
//单字节(GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
//双字节
size_t iGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, iGbkLen);
char *p = pIn;
size_t iLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, (size_t *)&iGbkLen, (char **)&o, (size_t *)&iLeftLen);
if(iRet < 0)
{
//转换不了, 暂时替换为空格
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
vtStr.push_back(sOut);
}
iconv_close(cd);
}
void TC_Encoder::utf82gbk(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
iconv_t cd;
cd = iconv_open("GBK","UTF-8");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv_open error", errno);
}
char * pIn = (char*)sIn;
size_t sizeLeftLen = iMaxOutLen;
size_t sizeInLen = iInLen;
char* pOut = sOut;
size_t ret = iconv(cd, &pIn, &sizeInLen, (char **)&sOut, &sizeLeftLen);
if (ret == (size_t) - 1)
{
iMaxOutLen = 0;
iconv_close(cd);
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv error", errno);
return;
}
iconv_close(cd);
pOut[iMaxOutLen - (int)sizeLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - (int)sizeLeftLen;
}
string TC_Encoder::utf82gbk(const string &sIn)
{
if(sIn.length() == 0)
{
return "";
}
string sOut;
int iLen = sIn.length() * 2 + 1;
char *pOut = new char[iLen];
try
{
utf82gbk(pOut, iLen, sIn.c_str(), sIn.length());
}
catch (TC_Encoder_Exception& e)
{
delete[] pOut;
throw e;
}
sOut.assign(pOut, iLen);
delete[] pOut;
return sOut;
}
/**
* \n -> \r\0
* \r -> \r\r
*/
string TC_Encoder::transTo(const string& str, char f /*='\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = str;
for (size_t i = 0; i < ret.length(); ++i)
{
if (ret[i] == f)
{
ret[i] = t;
ret.insert(++i, 1, u);
}
else if (ret[i] == t)
{
ret.insert(++i, 1, t);
}
}
return ret;
}
/**
* \r\0 -> \n
* \r\r -> \r
*/
string TC_Encoder::transFrom(const string& str, char f /*= '\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = "";
for (string::const_iterator it = str.begin()
; it != str.end()
; ++it)
{
ret.append(1, *it);
if (*it == t)
{
if (*(++it) == u)
{
*ret.rbegin() = f;
}
}
}
return ret;
}
}
边栏推荐
- P26-P34 third_ template
- AWT introduction
- Tf/pytorch/cafe-cv/nlp/ audio - practical demonstration of full ecosystem CPU deployment - Intel openvino tool suite course summary (Part 2)
- Vant --- detailed explanation and use of list component in vant
- QT qtablewidget table column top requirements ideas and codes
- Abap:ooalv realizes the function of adding, deleting, modifying and checking
- 24 magicaccessorimpl can access the debugging of all methods
- ADC voltage calculation of STM32 single chip microcomputer
- 树形dp
- 云原生——上云必读之SSH篇(常用于远程登录云服务器)
猜你喜欢
[backpack DP] backpack problem
如何实现视频平台会员多账号登录
ABAP:OOALV实现增删改查功能
Tf/pytorch/cafe-cv/nlp/ audio - practical demonstration of full ecosystem CPU deployment - Intel openvino tool suite course summary (Part 2)
ES6 modularization
R统计绘图-随机森林分类分析及物种丰度差异检验组合图
70000 words of detailed explanation of the whole process of pad openvino [CPU] - from environment configuration to model deployment
Which water in the environment needs water quality monitoring
AWT common components, FileDialog file selection box
After the festival, a large number of people change careers. Is it still time to be 30? Listen to the experience of the past people
随机推荐
JSON Web Token----JWT和傳統session登錄認證對比
C语言中的函数(详解)
STC8H开发(十二): I2C驱动AT24C08,AT24C32系列EEPROM存储
【MySQL】数据库视图的介绍、作用、创建、查看、删除和修改(附练习题)
JS execution mechanism
Tf/pytorch/cafe-cv/nlp/ audio - practical demonstration of full ecosystem CPU deployment - Intel openvino tool suite course summary (Part 2)
Cloud native - SSH article that must be read on the cloud (commonly used for remote login to ECS)
198. House raiding
How to implement cross domain requests
Functions in C language (detailed explanation)
MySQL learning notes 3 - JDBC
Uniapp custom environment variables
Displaying currency in Indian numbering format
Appium基础 — APPium安装(二)
leetcode 310. Minimum Height Trees
Review | categories and mechanisms of action of covid-19 neutralizing antibodies and small molecule drugs
C # symmetric encryption (AES encryption) ciphertext results generated each time, different ideas, code sharing
金盾视频播放器拦截的软件关键词和进程信息
Tree DP
uniapp 自定義環境變量