您好, 欢迎来到 !    登录 | 注册 | | 设为首页 | 收藏本站

使用VBA进行Web抓取(当HTML <> DOM时)

使用VBA进行Web抓取(当HTML <> DOM时)

大纲:

使GET XHR到https://www.wunderground.com/cgi-bin/findweather/getForecast?query=EIDW。

从HTML响应行中提取位置,var query = ‘zmw:’ + ‘00000.271.03969’;并从这些行中提取键var citypage_options = {k: ‘c991975b7f4186c0’, …。

使用位置让GET XHR 00000.271.03969和密钥c991975b7f4186c0来https://api-ak-aws.wunderground.com/api/c991975b7f4186c0/forecast10day/hourly10day/labels/conditions/astronomy10day/lang:zh-CN/units:metric/v:2.0/bestfct:1/q/zmw:00000.271。 03969.json。

使用解析JSON响应(例如,使用VBAJSON解析器),使用Parse()转换所需的数据ToArray(),并作为表格输出到工作表。

实际上,每次打开该网页时,Web浏览器都会执行几乎相同的操作。

您可以使用下面的VBA代码来解析响应并输出结果。将JSON.bas模块导入VBA项目以进行JSON处理。

Sub TestScrapeWunderground()

    Dim sContent As String
    Dim sKey As String
    Dim sLocation As String
    Dim vJSON As Variant
    Dim sState As String
    Dim oDays As Object
    Dim oHours As Object
    Dim vDay As Variant
    Dim vHour As Variant
    Dim aRows() As Variant
    Dim aHeader() As Variant

    ' GET XHR to retrieve location and key
    With CreateObject("MSXML2.ServerXMLHTTP")
        .Open "GET", "https://www.wunderground.com/cgi-bin/findweather/getForecast?query=EIDW", False
        .Send
        sContent = .responseText
    End With
    ' Extract location and key from HTML content
    sLocation = Split(Split(sContent, "var query = 'zmw:' + '", 2)(1), "'", 2)(0)
    sKey = Split(Split(sContent, vbTab & "k: '", 2)(1), "'", 2)(0)
    ' GET XHR to retrieve JSON data
    With CreateObject("MSXML2.ServerXMLHTTP")
        .Open "GET", "https://api-ak-aws.wunderground.com/api/" & sKey & "/forecast10day/hourly10day/labels/conditions/astronomy10day/lang:en/units:metric/v:2.0/bestfct:1/q/zmw:" & sLocation & ".json", False
        .Send
        sContent = .responseText
    End With
    ' Parse JSON response to data structure
    JSON.Parse sContent, vJSON, sState
    ' Populate dictionaries with daily and hourly forecast data
    Set oDays = CreateObject("Scripting.Dictionary")
    Set oHours = CreateObject("Scripting.Dictionary")
    For Each vDay In vJSON("forecast")("days")
        oDays(vDay("summary")) = ""
        For Each vHour In vDay("hours")
            oHours(vHour) = ""
        Next
    Next
    ' Convert daily forecast data to arrays
    JSON.ToArray oDays.Keys(), aRows, aHeader
    ' Output daily forecast data to table
    With Sheets(1)
        .Cells.Delete
        OutputArray .Cells(1, 1), aHeader
        Output2DArray .Cells(2, 1), aRows
        .Columns.AutoFit
    End With
    ' Convert hourly forecast data to arrays
    JSON.ToArray oHours.Keys(), aRows, aHeader
    ' Output hourly forecast data to table
    With Sheets(2)
        .Cells.Delete
        OutputArray .Cells(1, 1), aHeader
        Output2DArray .Cells(2, 1), aRows
        .Columns.AutoFit
    End With
    ' Convert response data to arrays
    JSON.ToArray Array(vJSON("response")), aRows, aHeader
    ' Output response transposed data to table
    With Sheets(3)
        .Cells.Delete
        Output2DArray .Cells(1, 1), WorksheetFunction.Transpose(aHeader)
        Output2DArray .Cells(1, 2), WorksheetFunction.Transpose(aRows)
        .Columns.AutoFit
    End With
    ' Convert current data to arrays
    JSON.ToArray Array(vJSON("current_observation")), aRows, aHeader
    ' Output current transposed data to table
    With Sheets(4)
        .Cells.Delete
        Output2DArray .Cells(1, 1), WorksheetFunction.Transpose(aHeader)
        Output2DArray .Cells(1, 2), WorksheetFunction.Transpose(aRows)
        .Columns.AutoFit
    End With
    ' Populate dictionary with daily astronomy data
    Set oDays = CreateObject("Scripting.Dictionary")
    For Each vDay In vJSON("astronomy")("days")
        oDays(vDay) = ""
    Next
    ' Convert daily astronomy data to arrays
    JSON.ToArray oDays.Keys(), aRows, aHeader
    ' Output daily astronomy transposed data to table
    With Sheets(5)
        .Cells.Delete
        Output2DArray .Cells(1, 1), WorksheetFunction.Transpose(aHeader)
        Output2DArray .Cells(1, 2), WorksheetFunction.Transpose(aRows)
        .Columns.AutoFit
    End With
    ' Convert hourly history data to arrays
    JSON.ToArray vJSON("history")("days")(0)("hours"), aRows, aHeader
    ' Output hourly history data to table
    With Sheets(6)
        .Cells.Delete
        OutputArray .Cells(1, 1), aHeader
        Output2DArray .Cells(2, 1), aRows
        .Columns.AutoFit
    End With
    Msg@R_757_2419@ "Completed"

End Sub

Sub OutputArray(oDstRng As Range, aCells As Variant)

    With oDstRng
        .Parent.Select
        With .Resize( _
                1, _
                UBound(aCells) - LBound(aCells) + 1)
            .NumberFormat = "@"
            .Value = aCells
        End With
    End With

End Sub

Sub Output2DArray(oDstRng As Range, aCells As Variant)

    With oDstRng
        .Parent.Select
        With .Resize( _
                UBound(aCells, 1) - LBound(aCells, 1) + 1, _
                UBound(aCells, 2) - LBound(aCells, 2) + 1)
            .NumberFormat = "@"
            .Value = aCells
        End With
    End With

End Sub

第二个XHR返回JSON数据,以清楚说明如何从其中提取必要的数据,您可以将JSON保存到文件中,复制内容并将其粘贴到任何JSON查看器中以进行进一步研究。我使用在线工具http://jsonviewer.stack.hu,

有6个主要部分,提取了数据的相关部分并将其输出到6个工作表(必须在运行之前手动创建):

Sheet1 - Daily forecast
Sheet2 - Horly forecast
Sheet3 - Response data (transposed)
Sheet4 - Current data (transposed)
Sheet5 - Astronomy (transposed)
Sheet6 - Hourly history data

有了该示例,您可以从该JSON响应中提取所需的数据。

其他 2022/1/1 18:17:58 有502人围观

撰写回答


你尚未登录,登录后可以

和开发者交流问题的细节

关注并接收问题和回答的更新提醒

参与内容的编辑和改进,让解决方法与时俱进

请先登录

推荐问题


联系我
置顶