private void button1_Click(object sender, EventArgs e)
{
textBox1.Text = System.IO.File.ReadAllText(@textBox2.Text , Encoding.GetEncoding("UTF-8")); //gb2312
string fileName = @textBox2.Text ;
Encoding encoding = GetTextFileEncodingType(fileName);
Console.WriteLine(encoding.GetType());
Console.WriteLine($"代码页:【{encoding.CodePage}】,描述【{encoding.EncodingName}】");
Console.WriteLine($"WindowsCodePage:【{encoding.WindowsCodePage}】,WebName:【{encoding.WebName}】,HeaderName:【{encoding.HeaderName}】,BodyName:【{encoding.BodyName}】");
Console.ReadLine();
}
}
///
/// 获取文本文件的字符编码类型
///
/// <param name="fileName"></param>
/// <returns></returns>
static Encoding GetTextFileEncodingType(string fileName)
{
Encoding encoding = Encoding.Default;
FileStream fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read);
BinaryReader binaryReader = new BinaryReader(fileStream, encoding);
byte[] buffer = binaryReader.ReadBytes((int)fileStream.Length);
binaryReader.Close();
fileStream.Close();
if (buffer.Length >= 3 && buffer[0] == 239 && buffer[1] == 187 && buffer[2] == 191)
{
encoding = Encoding.UTF8;
}
else if (buffer.Length >= 3 && buffer[0] == 254 && buffer[1] == 255 && buffer[2] == 0)
{
encoding = Encoding.BigEndianUnicode;
}
else if (buffer.Length >= 3 && buffer[0] == 255 && buffer[1] == 254 && buffer[2] == 65)
{
encoding = Encoding.Unicode;
}
else if (IsUTF8Bytes(buffer))
{
encoding = Encoding.UTF8;
}
return encoding;
}
///
/// 判断是否是不带 BOM 的 UTF8 格式
/// BOM(Byte Order Mark),字节顺序标记,出现在文本文件头部,Unicode编码标准中用于标识文件是采用哪种格式的编码。
///
/// <param name="data"></param>
/// <returns></returns>
private static bool IsUTF8Bytes(byte[] data)
{
int charByteCounter = 1; //计算当前正分析的字符应还有的字节数
byte curByte; //当前分析的字节.
for (int i = 0; i < data.Length; i++)
{
curByte = data[i];
if (charByteCounter == 1)
{
if (curByte >= 0x80)
{
//判断当前
while (((curByte <<= 1) & 0x80) != 0)
{
charByteCounter++;
}
//标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
if (charByteCounter == 1 || charByteCounter > 6)
{
return false;
}
}
}
else
{
//若是UTF-8 此时第一位必须为1
if ((curByte & 0xC0) != 0x80)
{
return false;
}
charByteCounter--;
}
}
if (charByteCounter > 1)
{
throw new Exception("非预期的byte格式");
}
return true;
}
C# 转 VB.net代码
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
3条回答 默认 最新
- 菜鸟才能学的更多 2023-02-19 21:37关注
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click TextBox1.Text = System.IO.File.ReadAllText(TextBox2.Text, Encoding.GetEncoding("UTF-8")) 'gb2312 Dim fileName As String = TextBox2.Text Dim encoding As Encoding = GetTextFileEncodingType(fileName) Console.WriteLine(encoding.GetType()) Console.WriteLine($"代码页:【{encoding.CodePage}】,描述【{encoding.EncodingName}】") Console.WriteLine($"WindowsCodePage:【{encoding.WindowsCodePage}】,WebName:【{encoding.WebName}】,HeaderName:【{encoding.HeaderName}】,BodyName:【{encoding.BodyName}】") Console.ReadLine() End Sub Private Function GetTextFileEncodingType(fileName As String) As Encoding Dim encoding As Encoding = Encoding.Default Dim fileStream As New FileStream(fileName, FileMode.Open, FileAccess.Read) Dim binaryReader As New BinaryReader(fileStream, encoding) Dim buffer As Byte() = binaryReader.ReadBytes(CInt(fileStream.Length)) binaryReader.Close() fileStream.Close() If buffer.Length >= 3 AndAlso buffer(0) = 239 AndAlso buffer(1) = 187 AndAlso buffer(2) = 191 Then encoding = Encoding.UTF8 ElseIf buffer.Length >= 3 AndAlso buffer(0) = 254 AndAlso buffer(1) = 255 AndAlso buffer(2) = 0 Then encoding = Encoding.BigEndianUnicode ElseIf buffer.Length >= 3 AndAlso buffer(0) = 255 AndAlso buffer(1) = 254 AndAlso buffer(2) = 65 Then encoding = Encoding.Unicode ElseIf IsUTF8Bytes(buffer) Then encoding = Encoding.UTF8 End If Return encoding End Function Private Function IsUTF8Bytes(data As Byte()) As Boolean Dim charByteCounter As Integer = 1 '计算当前正分析的字符应还有的字节数 Dim curByte As Byte '当前分析的字节. For i As Integer = 0 To data.Length - 1 curByte = data(i) If charByteCounter = 1 Then If curByte >= &H80 Then '判断当前 While ((curByte <<= 1) And &H80) <> 0 charByteCounter += 1 End While '标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X If charByteCounter = 1 OrElse charByteCounter > 6 Then Return False End If End If Else '若是UTF-8 此时第一位必须为1 If (curByte And &HC0) <> &H80 Then Return False End If charByteCounter -= 1 End If Next If charByteCounter > 1 Then Throw New Exception("非预期的byte格式") End If Return True End Function
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 1无用
悬赏问题
- ¥20 机器学习能否像多层线性模型一样处理嵌套数据
- ¥20 西门子S7-Graph,S7-300,梯形图
- ¥50 用易语言http 访问不了网页
- ¥50 safari浏览器fetch提交数据后数据丢失问题
- ¥15 matlab不知道怎么改,求解答!!
- ¥15 永磁直线电机的电流环pi调不出来
- ¥15 用stata实现聚类的代码
- ¥15 请问paddlehub能支持移动端开发吗?在Android studio上该如何部署?
- ¥20 docker里部署springboot项目,访问不到扬声器
- ¥15 netty整合springboot之后自动重连失效