当前位置:网站首页>【delphi】判断文件的编码方式(ANSI、Unicode、UTF8、UnicodeBIG)
【delphi】判断文件的编码方式(ANSI、Unicode、UTF8、UnicodeBIG)
2022-06-11 23:02:00 【sensor_WU】
在开发中,经常会碰到文件编码格式不正确,有时也需要进行编码转换,具体的编码原理可以自行查找,这里记录下我的处理方式。根据这个原理稍加修改即可实现编码转换和自动判断。
{判断文件的编码方式,可以实现编码转化
sensor
2018-08-02
}
unit uCODE_Convert;
interface
uses
Winapi.Windows,
System.SysUtils,
System.Variants,
System.Classes;
type
TCODE_TYPE = (ctANSI,ctUnicode,ctUTF8,ctUnicodeBIG);
//判断一个文件的编码格式
//入口参数:FieName文件名称
//出口参数:文件类型
function Get_FileCode_TYPE(FileName : string) : TCODE_TYPE;
//判断一个文件是不是ANSI编码,判断标准是 大于$80的字节必须偶数出现,
//内容中不能出现$00 否则肯定不是,如果全部是,则表示是ANSI编码
//入口:文件字节流
//出口:True 表示时ANSI编码,否则不是ANSI编码
function is_ANSI_CODE(M : TMemoryStream) : Boolean;
//判断是否是UTF8
function is_UTF8_CODE(BB : TBytes) : Boolean;
function GetEncodingType(code: TCODE_TYPE): Tencoding;
function GetFileEncoding(FileName : string) : Tencoding;
implementation
function GetFileEncoding(FileName : string) : Tencoding;
var
code: TCODE_TYPE;
begin
code := Get_FileCode_TYPE(FileName);
result := GetEncodingType(code);
end;
function GetEncodingType(code: TCODE_TYPE): Tencoding;
begin
case code of
ctANSI:
result := TEncoding.ANSI;
ctUnicode:
result := TEncoding.Unicode;
ctUTF8:
result := TEncoding.UTF8;
ctUnicodeBIG:
result := TEncoding.BigEndianUnicode
else
result := TEncoding.ANSI;
end;
end;
//判断一个文件的编码格式
function Get_FileCode_TYPE(FileName : string) : TCODE_TYPE;
var
MF : TMemoryStream;
MB : TBytes;
B : Byte;
Position,Len,i : Int64;
D80 : Int64;
isANSI : Boolean;
begin
//首先打开文件
if not FileExists(FileName) then Exit(ctANSI);
MF := TMemoryStream.Create;
MF.LoadFromFile(FileName);
MF.Position := 0;
Len := MF.Size;
SetLength(MB,Len);
MF.Read(MB[0],Len); //读取数据到内存表中
MF.Free;
try
//1. 首先根据标志判断 FF FE?
if (MB[0] = $FF) and (MB[1] = $FE) then Exit(ctUnicode); //Unicode
//2. 首先根据标志判断 FE FF?
if (MB[0] = $FE) and (MB[1] = $FF) then Exit(ctUnicodeBIG); //ctUnicodeBIG
//3. 首先根据标志判断 EF BB BF?
if (MB[0] = $EF) and (MB[1] = $BB) and (MB[2] = $BF) then Exit(ctUTF8); //ctUTF8
//下面判断是否是UTF8
if is_UTF8_CODE(MB) then exit(ctUTF8);
//下面判断是否是ANSI ,同时又确认Unicode
isANSI := True;
for i := 0 to Len - 1 do
begin
B := MB[i];
if B = 0 then
if (Len Mod 2) = 0 then //说明是Unicode ,此时需要判断是不是 Big
begin
if i = 0 then Exit(ctUnicodeBIG); //第一个就是0,并且是偶数字节,可以确定是ctUnicodeBIG
if MB[i - 1] < $80 then
Exit(ctUnicode) //第一个$00的前面是小于$80,说明是Unicode, 否则是UnicodeBig
else
Exit(ctUnicodeBIG)
end
else
begin
isANSI := False;
Break; //出现0字符,长度又不是偶数字节,可以确定表示不是ANSI
end;
end;
if isANSI then Exit(ctANSI);
finally
// MF.Free;
end;
end;
function is_ANSI_CODE(M : TMemoryStream) : Boolean;
var
MB : TBytes;
B : Byte;
Position,Len,i : Int64;
D80 : Int64;
begin
Result := False;
Len := M.Size; //文件长度
M.Position := 0; //从第一个开始
D80 := 0; //默认$80数
SetLength(MB,Len);
M.Read(MB[0],Len); //读取到内存中
for i := 0 to Len - 1 do
begin
B := MB[i];
if B = 0 then Exit(False); //出现0字符,表示不是ANSI
if B >= $80 then
D80 := D80 + 1
else
if (D80 mod 2) = 0 then
D80 := 0
else
Exit(False);
end;
Result := True;
end;
//判断是否是UTF8
function is_UTF8_CODE(BB : TBytes) : Boolean;
var
B : Byte;
Position,Len,i : Int64;
D80 : Int64;
begin
Result := True;
Len := Length(BB);
i := 0;
while (i < Len - 1) do
begin
B := BB[i];
if B < $80 then
begin
i := i + 1;
Continue;
end;
if B < $C0 then // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符
begin
Exit(False);
end;
if B < $E0 then // (11100000): 此范围内为2字节UTF-8字符
begin
if i >= (Len - 1) then Exit(False);
if (BB[ i + 1 ] and $C0) <> $80 then Exit(False);
i := i + 2;
end;
if B < $F0 then // (11110000): 此范围内为3字节UTF-8字符
begin
if i >= (Len - 1 - 1) then Exit(False);
if ((BB[ i + 1 ] and $C0) <> $80) and ((BB[ i + 2 ] and $C0) <> $80) then Exit(False);
i := i + 3;
end
else
Exit(False);
end;
end;
end.边栏推荐
- 2022安全员-C证判断题模拟考试平台操作
- Recruitment of audio and video quality test and Development Engineer
- Discrete mathematics attention points, priority problems
- Google搜索为什么不能无限分页?
- JsonParseException: Unrecognized token ‘username‘: was expecting表单提交登陆数据报错
- 926. flip string to monotonic increment
- [Day2 intensive literature reading] time in the mind: using space to think about time
- IEEE-754 浮点转换器
- Super Codex from the open source world, the authoritative release of China's open source Codex list!
- Method for debugging wireless data packet capturing of Internet of things zigbee3.0 protocol e18-2g4u04b module
猜你喜欢

Here we go! Dragon lizard community enters PKU classroom

Bit operation in leetcode

Inventory | more than 20 typical security incidents occurred in February, with a loss of nearly $400million

Huawei equipment configuration hovpn

2022年高处安装、维护、拆除操作证考试题库模拟考试平台操作
![[day1/5 literature intensive reading] speed constancy or only slowness: what drives the kappa effect](/img/17/5481a9e05de96eb0a2f89709e6120d.png)
[day1/5 literature intensive reading] speed constancy or only slowness: what drives the kappa effect

NLP - fastText

Games-101 闫令琪 5-6讲 光栅化处理 (笔记整理)
![[solution] solution to asymmetric and abnormal transformation caused by modifying the transform information of sub objects](/img/52/7e741154e4d6e61c5df7e8701ab177.png)
[solution] solution to asymmetric and abnormal transformation caused by modifying the transform information of sub objects

Only three steps are needed to learn how to use low code thingjs to connect with Sen data Dix data
随机推荐
Inventory | more than 20 typical security incidents occurred in February, with a loss of nearly $400million
【Day4 文献精读】Space–time interdependence: Evidence against asymmetric mapping between time and space
[day3 literature intensive reading] Oriental time and space interaction in tau and kappa effects
[Day10 literature extensive reading] temporary cognition can affect spatial cognition more than vice versa: the effect of
[Day2 intensive literature reading] time in the mind: using space to think about time
Point cloud read / write (2): read / write TXT point cloud (space separated | comma separated)
Research Report on development trend and competitive strategy of global customized power supply industry
Jetpack架构组件学习(3)——Activity Results API使用
2022高压电工考试题模拟考试题库及在线模拟考试
Solution to page locking caused by xshell accidentally pressing ctrl+s
2022年低压电工上岗证题目及在线模拟考试
习题6-2 使用函数求特殊a串数列和 (20 分)
Exercise 8-8 judging palindrome string (20 points)
2022年安全员-A证考题模拟考试平台操作
[matlab] second order saving response
Want to be iron man? It is said that many big men use it to get started
Research Report on development trend and competitive strategy of global non directional beacon industry
[bitbear story collection] February MVP hero story open source with love
[day6-7 intensive literature reading] a unifying Bayesian framework accounting for spatiotemporal interactions with a
Exercise 6-6 using a function to output an integer in reverse order (20 points)