这个问题困惑我很久了,以前总是要让用户自己根据网页编码来选择UTF-8还是GB2312 。
今天遇到一个读取文件乱码,突然想起了以前用XMLHTTP获取网页源代码乱码问题,嘿嘿~一同解决了。
跟大家分享一下吧!
主要是对编码的判断,然后进行相对应的转换即可。
代码如下:
- VBScript code复制代码
Option Explicit Private Sub Form_Load() '测试 Text1.Text = GetHtml("http://www.NewXing.com") End Sub Public Function GetHtml(url As String) Dim xmlHttp As Object Set xmlHttp = CreateObject("Microsoft.XMLHTTP") xmlHttp.open "GET", url, True xmlHttp.send (Null) While xmlHttp.ReadyState <> 4 DoEvents Wend GetHtml = BytesToBstr(xmlHttp.responseBody) End Function Private Function BytesToBstr(Bytes) Dim Unicode As String If IsUTF8(Bytes) Then '如果不是UTF-8编码则按照GB2312来处理 Unicode = "UTF-8" Else Unicode = "GB2312" End If Dim objstream As Object Set objstream = CreateObject("ADODB.Stream") With objstream .Type = 1 .Mode = 3 .open .Write Bytes .Position = 0 .Type = 2 .Charset = Unicode BytesToBstr = .ReadText .Close End With End Function '判断网页编码函数 Private Function IsUTF8(Bytes) As Boolean Dim i As Long, AscN As Long, Length As Long Length = UBound(Bytes) + 1 If Length < 3 Then IsUTF8 = False Exit Function ElseIf Bytes(0) = &HEF And Bytes(1) = &HBB And Bytes(2) = &HBF Then IsUTF8 = True Exit Function End If Do While i <= Length - 1 If Bytes(i) < 128 Then i = i + 1 AscN = AscN + 1 ElseIf (Bytes(i) And &HE0) = &HC0 And (Bytes(i + 1) And &HC0) = &H80 Then i = i + 2 ElseIf i + 2 < Length Then If (Bytes(i) And &HF0) = &HE0 And (Bytes(i + 1) And &HC0) = &H80 And (Bytes(i + 2) And &HC0) = &H80 Then i = i + 3 Else IsUTF8 = False Exit Function End If Else IsUTF8 = False Exit Function End If Loop If AscN = Length Then IsUTF8 = False Else IsUTF8 = True End If End Function