当前位置:网站首页>Tar source code analysis Part 10
Tar source code analysis Part 10
2022-07-04 06:37:00 【Tao song remains the same】
gbk and utf8 It's a topic you can never get around , How to transform it ?
and , I guess you've stepped on the pit . I stepped on it anyway .
It's very simple , Let's take a look at the code :
#include <iconv.h>
#include <errno.h>
#include <string.h>
#include "util/tc_encoder.h"
namespace tars
{
void TC_Encoder::gbk2utf8(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
char * pIn = (char*)sIn;
char * pEnd = pIn+iInLen;
char * pOut = sOut;
size_t iLeftLen;
size_t iGbkLen;
iconv_t cd;
if (iInLen > iMaxOutLen)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iInLen > iMaxOutLen error : ", errno);
}
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error : ", errno);
}
iLeftLen = iMaxOutLen;
while(pIn < pEnd)
{
if((unsigned char)(*pIn)==0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
*pOut = 0xe2; pOut++; iLeftLen--;
*pOut = 0x82; pOut++; iLeftLen--;
*pOut = 0xac; pOut++; iLeftLen--;
pIn++;
}
else if((unsigned char)(*pIn)<0x80)
{
// Single byte (GBK: 0x00-0x7F)
*pOut = *pIn;
pIn++;pOut++;iLeftLen--;
}
else
{
// Double byte
iGbkLen=2;
int iRet=iconv(cd, &pIn, (size_t *)&iGbkLen, (char **)&pOut, (size_t *)&iLeftLen);
if(iRet < 0)
{
*pOut = ' '; // Conversion cannot be replaced with spaces
pIn+=2; pOut++; iLeftLen--;
}
}
}
iconv_close(cd);
sOut[iMaxOutLen - iLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - iLeftLen;
}
string TC_Encoder::gbk2utf8(const string &sIn)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
string sOut;
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
if((unsigned char)sIn[pos] == 0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
// Single byte (GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
// Double byte
size_t sizeGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, sizeGbkLen);
char *p = pIn;
size_t sizeLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, &sizeGbkLen, (char **)&o, &sizeLeftLen);
if(iRet < 0)
{
// It can't be converted , Temporarily replace with spaces
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
}
iconv_close(cd);
return sOut;
}
void TC_Encoder::gbk2utf8(const string &sIn, vector<string> &vtStr)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
vtStr.clear();
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
string sOut;
if((unsigned char)sIn[pos] == 0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
// Single byte (GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
// Double byte
size_t iGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, iGbkLen);
char *p = pIn;
size_t iLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, (size_t *)&iGbkLen, (char **)&o, (size_t *)&iLeftLen);
if(iRet < 0)
{
// It can't be converted , Temporarily replace with spaces
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
vtStr.push_back(sOut);
}
iconv_close(cd);
}
void TC_Encoder::utf82gbk(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
iconv_t cd;
cd = iconv_open("GBK","UTF-8");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv_open error", errno);
}
char * pIn = (char*)sIn;
size_t sizeLeftLen = iMaxOutLen;
size_t sizeInLen = iInLen;
char* pOut = sOut;
size_t ret = iconv(cd, &pIn, &sizeInLen, (char **)&sOut, &sizeLeftLen);
if (ret == (size_t) - 1)
{
iMaxOutLen = 0;
iconv_close(cd);
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv error", errno);
return;
}
iconv_close(cd);
pOut[iMaxOutLen - (int)sizeLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - (int)sizeLeftLen;
}
string TC_Encoder::utf82gbk(const string &sIn)
{
if(sIn.length() == 0)
{
return "";
}
string sOut;
int iLen = sIn.length() * 2 + 1;
char *pOut = new char[iLen];
try
{
utf82gbk(pOut, iLen, sIn.c_str(), sIn.length());
}
catch (TC_Encoder_Exception& e)
{
delete[] pOut;
throw e;
}
sOut.assign(pOut, iLen);
delete[] pOut;
return sOut;
}
/**
* \n -> \r\0
* \r -> \r\r
*/
string TC_Encoder::transTo(const string& str, char f /*='\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = str;
for (size_t i = 0; i < ret.length(); ++i)
{
if (ret[i] == f)
{
ret[i] = t;
ret.insert(++i, 1, u);
}
else if (ret[i] == t)
{
ret.insert(++i, 1, t);
}
}
return ret;
}
/**
* \r\0 -> \n
* \r\r -> \r
*/
string TC_Encoder::transFrom(const string& str, char f /*= '\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = "";
for (string::const_iterator it = str.begin()
; it != str.end()
; ++it)
{
ret.append(1, *it);
if (*it == t)
{
if (*(++it) == u)
{
*ret.rbegin() = f;
}
}
}
return ret;
}
}
边栏推荐
- How does the inner roll break?
- Matlab remainder
- Tar source code analysis 4
- What is a spotlight effect?
- 金盾视频播放器拦截的软件关键词和进程信息
- [backpack DP] backpack problem
- How to implement cross domain requests
- Explain in one sentence what social proof is
- [problem record] 03 connect to MySQL database prompt: 1040 too many connections
- P26-P34 third_ template
猜你喜欢
如何实现视频平台会员多账号登录
MySQL learning notes 3 - JDBC
Arcpy uses the updatelayer function to change the symbol system of the layer
SQL injection SQL lab 11~22
C language - Blue Bridge Cup - Snake filling
Matlab remainder
Native Cloud - SSH articles must be read on Cloud (used for Remote Login to Cloud Server)
雲原生——上雲必讀之SSH篇(常用於遠程登錄雲服務器)
C # symmetric encryption (AES encryption) ciphertext results generated each time, different ideas, code sharing
Sleep quality today 78 points
随机推荐
Learning multi-level structural information for small organ segmentation
Tar source code analysis 6
regular expression
198. House raiding
Option (024) - do all objects have prototypes?
对List进行排序工具类,可以对字符串排序
Code rant: from hard coding to configurable, rule engine, low code DSL complexity clock
Mysql 45讲学习笔记(六)全局锁
采用中微BATG135实现IIC数据/指令交互
740. Delete and get points
2022 wechat enterprise mailbox login entry introduction, how to open and register enterprise wechat enterprise mailbox?
Fast power (template)
Inputstream/outputstream (input and output of file)
Operator < <> > fool test case
Redis面试题集
[Android reverse] function interception (CPU cache mechanism | CPU cache mechanism causes function interception failure)
what the fuck! If you can't grab it, write it yourself. Use code to realize a Bing Dwen Dwen. It's so beautiful ~!
tars源码分析之8
2022 where to find enterprise e-mail and which is the security of enterprise e-mail system?
STM32 单片机ADC 电压计算