当前位置:网站首页>Tar source code analysis Part 10
Tar source code analysis Part 10
2022-07-04 06:37:00 【Tao song remains the same】
gbk and utf8 It's a topic you can never get around , How to transform it ?
and , I guess you've stepped on the pit . I stepped on it anyway .
It's very simple , Let's take a look at the code :
#include <iconv.h>
#include <errno.h>
#include <string.h>
#include "util/tc_encoder.h"
namespace tars
{
void TC_Encoder::gbk2utf8(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
char * pIn = (char*)sIn;
char * pEnd = pIn+iInLen;
char * pOut = sOut;
size_t iLeftLen;
size_t iGbkLen;
iconv_t cd;
if (iInLen > iMaxOutLen)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iInLen > iMaxOutLen error : ", errno);
}
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error : ", errno);
}
iLeftLen = iMaxOutLen;
while(pIn < pEnd)
{
if((unsigned char)(*pIn)==0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
*pOut = 0xe2; pOut++; iLeftLen--;
*pOut = 0x82; pOut++; iLeftLen--;
*pOut = 0xac; pOut++; iLeftLen--;
pIn++;
}
else if((unsigned char)(*pIn)<0x80)
{
// Single byte (GBK: 0x00-0x7F)
*pOut = *pIn;
pIn++;pOut++;iLeftLen--;
}
else
{
// Double byte
iGbkLen=2;
int iRet=iconv(cd, &pIn, (size_t *)&iGbkLen, (char **)&pOut, (size_t *)&iLeftLen);
if(iRet < 0)
{
*pOut = ' '; // Conversion cannot be replaced with spaces
pIn+=2; pOut++; iLeftLen--;
}
}
}
iconv_close(cd);
sOut[iMaxOutLen - iLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - iLeftLen;
}
string TC_Encoder::gbk2utf8(const string &sIn)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
string sOut;
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
if((unsigned char)sIn[pos] == 0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
// Single byte (GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
// Double byte
size_t sizeGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, sizeGbkLen);
char *p = pIn;
size_t sizeLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, &sizeGbkLen, (char **)&o, &sizeLeftLen);
if(iRet < 0)
{
// It can't be converted , Temporarily replace with spaces
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
}
iconv_close(cd);
return sOut;
}
void TC_Encoder::gbk2utf8(const string &sIn, vector<string> &vtStr)
{
iconv_t cd;
cd = iconv_open("UTF-8","GBK");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::gbk2utf8] iconv_open error", errno);
}
vtStr.clear();
for(string::size_type pos = 0; pos < sIn.length(); ++pos)
{
string sOut;
if((unsigned char)sIn[pos] == 0x80)
{
// Be careful GBK Of 0x80 Convert to UTF-8 When is E2 82 AC
sOut += 0xe2;
sOut += 0x82;
sOut += 0xac;
}
else if((unsigned char)sIn[pos] < 0x80)
{
// Single byte (GBK: 0x00-0x7F)
sOut += sIn[pos];
}
else
{
// Double byte
size_t iGbkLen = 2;
char pIn[128] = "\0";
strncpy(pIn, sIn.c_str() + pos, iGbkLen);
char *p = pIn;
size_t iLeftLen = 128;
char pOut[128] = "\0";
char *o = pOut;
int iRet = iconv(cd, &p, (size_t *)&iGbkLen, (char **)&o, (size_t *)&iLeftLen);
if(iRet < 0)
{
// It can't be converted , Temporarily replace with spaces
sOut += ' ';
}
else
{
sOut += pOut;
}
++pos;
}
vtStr.push_back(sOut);
}
iconv_close(cd);
}
void TC_Encoder::utf82gbk(char *sOut, int &iMaxOutLen, const char *sIn, int iInLen)
{
iconv_t cd;
cd = iconv_open("GBK","UTF-8");
if (cd == (iconv_t)-1)
{
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv_open error", errno);
}
char * pIn = (char*)sIn;
size_t sizeLeftLen = iMaxOutLen;
size_t sizeInLen = iInLen;
char* pOut = sOut;
size_t ret = iconv(cd, &pIn, &sizeInLen, (char **)&sOut, &sizeLeftLen);
if (ret == (size_t) - 1)
{
iMaxOutLen = 0;
iconv_close(cd);
throw TC_Encoder_Exception("[TC_Encoder::utf82gbk] iconv error", errno);
return;
}
iconv_close(cd);
pOut[iMaxOutLen - (int)sizeLeftLen] = '\0';
iMaxOutLen = iMaxOutLen - (int)sizeLeftLen;
}
string TC_Encoder::utf82gbk(const string &sIn)
{
if(sIn.length() == 0)
{
return "";
}
string sOut;
int iLen = sIn.length() * 2 + 1;
char *pOut = new char[iLen];
try
{
utf82gbk(pOut, iLen, sIn.c_str(), sIn.length());
}
catch (TC_Encoder_Exception& e)
{
delete[] pOut;
throw e;
}
sOut.assign(pOut, iLen);
delete[] pOut;
return sOut;
}
/**
* \n -> \r\0
* \r -> \r\r
*/
string TC_Encoder::transTo(const string& str, char f /*='\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = str;
for (size_t i = 0; i < ret.length(); ++i)
{
if (ret[i] == f)
{
ret[i] = t;
ret.insert(++i, 1, u);
}
else if (ret[i] == t)
{
ret.insert(++i, 1, t);
}
}
return ret;
}
/**
* \r\0 -> \n
* \r\r -> \r
*/
string TC_Encoder::transFrom(const string& str, char f /*= '\n'*/, char t /*= '\r'*/, char u /*= '\0'*/)
{
string ret = "";
for (string::const_iterator it = str.begin()
; it != str.end()
; ++it)
{
ret.append(1, *it);
if (*it == t)
{
if (*(++it) == u)
{
*ret.rbegin() = f;
}
}
}
return ret;
}
}
边栏推荐
- Explain in one sentence what social proof is
- Practical gadget instructions
- STC8H开发(十二): I2C驱动AT24C08,AT24C32系列EEPROM存储
- [March 3, 2019] MAC starts redis
- Functions in C language (detailed explanation)
- 2022年,或許是未來10年經濟最好的一年,2022年你畢業了嗎?畢業後是怎麼計劃的?
- 手动对list进行分页(参数list ,当前页,页面大小)
- tars源码分析之4
- Tar source code analysis 4
- Displaying currency in Indian numbering format
猜你喜欢

【问题记录】03 连接MySQL数据库提示:1040 Too many connections

Distributed cap theory

Fundamentals of SQL database operation

如何实现视频平台会员多账号登录
![[MySQL] introduction, function, creation, view, deletion and modification of database view (with exercises)](/img/03/2b37e63d0d482d5020b7421ac974cb.jpg)
[MySQL] introduction, function, creation, view, deletion and modification of database view (with exercises)

Native Cloud - SSH articles must be read on Cloud (used for Remote Login to Cloud Server)

云原生——上云必读之SSH篇(常用于远程登录云服务器)

MySQL installation and configuration

C realize Snake games

Variables d'environnement personnalisées uniapp
随机推荐
Displaying currency in Indian numbering format
Notes and notes
what the fuck! If you can't grab it, write it yourself. Use code to realize a Bing Dwen Dwen. It's so beautiful ~!
[March 3, 2019] MAC starts redis
Mysql 45讲学习笔记(七)行锁
QT releases multilingual International Translation
CORS is not intended to protect API endpoints - nikofischer
SQL injection SQL lab 11~22
SQL join, left join, right join usage
tars源码分析之4
内卷怎么破?
Operator < <> > fool test case
Google Chrome Portable Google Chrome browser portable version official website download method
Nexus 6p downgraded from 8.0 to 6.0+root
Another company raised the price of SAIC Roewe new energy products from March 1
C实现贪吃蛇小游戏
C language - Blue Bridge Cup - Snake filling
C language exercises (recursion)
Learning multi-level structural information for small organ segmentation
24 magicaccessorimpl can access the debugging of all methods