« VB RichText控件中解析HTML代码VB 实现获得标准北京时间 »

解决XMLHTTP获取网页中文乱码问题

这个问题困惑我很久了,以前总是要让用户自己根据网页编码来选择UTF-8还是GB2312 。
今天遇到一个读取文件乱码,突然想起了以前用XMLHTTP获取网页源代码乱码问题,嘿嘿~一同解决了。
   跟大家分享一下吧!
主要是对编码的判断,然后进行相对应的转换即可。
代码如下:

VBScript code复制代码
Option Explicit
Private Sub Form_Load()
    '测试
    Text1.Text = GetHtml("http://www.NewXing.com")
End Sub

Public Function GetHtml(url As String)
    Dim xmlHttp As Object
    Set xmlHttp = CreateObject("Microsoft.XMLHTTP")
    xmlHttp.open "GET", url, True
    xmlHttp.send (Null)
    While xmlHttp.ReadyState <> 4
        DoEvents
    Wend
    GetHtml = BytesToBstr(xmlHttp.responseBody)
End Function

Private Function BytesToBstr(Bytes)
    Dim Unicode As String
    If IsUTF8(Bytes) Then '如果不是UTF-8编码则按照GB2312来处理
        Unicode = "UTF-8"
    Else
        Unicode = "GB2312"
    End If

    Dim objstream As Object
    Set objstream = CreateObject("ADODB.Stream")
    With objstream
        .Type = 1
        .Mode = 3
        .open
        .Write Bytes
        .Position = 0
        .Type = 2
        .Charset = Unicode
        BytesToBstr = .ReadText
       .Close
    End With
End Function

 '判断网页编码函数
Private Function IsUTF8(Bytes) As Boolean
        Dim i As Long, AscN As Long, Length As Long
        Length = UBound(Bytes) + 1
        
        If Length < 3 Then
            IsUTF8 = False
            Exit Function
        ElseIf Bytes(0) = &HEF And Bytes(1) = &HBB And Bytes(2) = &HBF Then
            IsUTF8 = True
            Exit Function
        End If

        Do While i <= Length - 1
            If Bytes(i) < 128 Then
                i = i + 1
                AscN = AscN + 1
            ElseIf (Bytes(i) And &HE0) = &HC0 And (Bytes(i + 1) And &HC0) = &H80 Then
                i = i + 2

            ElseIf i + 2 < Length Then
                If (Bytes(i) And &HF0) = &HE0 And (Bytes(i + 1) And &HC0) = &H80 And (Bytes(i + 2) And &HC0) = &H80 Then
                     i = i + 3
                Else
                    IsUTF8 = False
                    Exit Function
                End If
            Else
                IsUTF8 = False
                Exit Function
            End If
        Loop
                
        If AscN = Length Then
            IsUTF8 = False
        Else
            IsUTF8 = True
        End If

End Function

 

  • 相关文章:

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。

日历

最新评论及回复

最近发表

Copyright © 2009-2011 linhaibo.com. All Rights Reserved .