CodeGuru Home VC++ / MFC / C++ .NET / C# Visual Basic VB Forums Developer.com
Results 1 to 1 of 1
  1. #1
    Join Date
    Aug 2004
    Posts
    61

    VBA - HTML element extraction

    I am pulling my hair out on why I am seeing different results in the VBA routines below. One method works perfectly (Internet Explorer code), the other (MSXML2.XMLHTTP code) does not work. I really hope that someone, more knowledgeable than myself, can tell me why. I found this routine (Internet Explorer code) on the web, I am trying to make it work using MSXML2.XMLHTTP, not having any luck.

    Please let me know why it doesn't work. I ran through the loop, it can't find the XPath element data that is shown in Firefox?? Any ideas?


    Internet Explorer Code (works):

    Code:
    Private Sub cmdGetQuote_Click()
        Dim ie As InternetExplorer
        Dim elem As HTMLBaseElement
        Dim url As String
    
        url = "https://www.sec.gov/Archives/edgar/data/789019/000119312516550254/d256147d10q.htm"
    
        Set ie = New InternetExplorer
        ie.Visible = True
        ie.navigate url
        Do While ie.Busy Or ie.readyState <> READYSTATE_COMPLETE
            DoEvents
        Loop
    
        Set elem = getXPathElement("/html/body/document/type/sequence/filename/description/text/table[7]/tbody/tr[22]/td[4]/font/b", ie.document)
        Debug.Print elem.innerText
    
        Set ie = Nothing
    End Sub

    MSXML2.XMLHTTP code (Does not work!):

    Code:
    Private Sub cmdGetQuote_Click()
        Dim sLoadFile As String
        Dim oHTML As Object
        Dim XMLHTTP As Object
        Dim elem As HTMLBaseElement
    
        sLoadFile = "https://www.sec.gov/Archives/edgar/data/789019/000119312516550254/d256147d10q.htm"
    
        Set oHTML = CreateObject("htmlfile")
        Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
        XMLHTTP.Open "GET", sLoadFile, False
        XMLHTTP.send ""
        
        oHTML.body.innerHTML = XMLHTTP.responseText
        
        Set elem = getXPathElement("/html/body/document/type/sequence/filename/description/text/table[7]/tbody/tr[22]/td[4]/font/b", oHTML)
        Debug.Print elem.innerText
    End Sub


    This is the common "getXPathElement" function that gets called by both routines above:

    Code:
    Public Function getXPathElement(sXPath As String, objElement As HTMLBaseElement) As HTMLBaseElement
        Dim sXPathArray() As String
    
        Dim sNodeName As String
        Dim sNodeNameIndex As String
        Dim sRestOfXPath As String
        Dim lNodeIndex As Long
        Dim lCount As Long
    
        ' Split the xpath statement
        sXPathArray = Split(sXPath, "/")
        sNodeNameIndex = sXPathArray(1)
        If Not InStr(sNodeNameIndex, "[") > 0 Then
            sNodeName = sNodeNameIndex
            lNodeIndex = 1
        Else
            sXPathArray = Split(sNodeNameIndex, "[")
            sNodeName = sXPathArray(0)
            lNodeIndex = CLng(Left(sXPathArray(1), Len(sXPathArray(1)) - 1))
        End If
        sRestOfXPath = Right(sXPath, Len(sXPath) - (Len(sNodeNameIndex) + 1))
    
        Set getXPathElement = Nothing
        For lCount = 0 To objElement.childNodes().Length - 1
            If UCase(objElement.childNodes().Item(lCount).nodeName) = UCase(sNodeName) Then
                If lNodeIndex = 1 Then
                    If sRestOfXPath = "" Then
                        Set getXPathElement = objElement.childNodes().Item(lCount)
                    Else
                        Set getXPathElement = getXPathElement(sRestOfXPath, objElement.childNodes().Item(lCount))
                    End If
                End If
                lNodeIndex = lNodeIndex - 1
            End If
        Next lCount
    End Function
    Last edited by 99bobster99; September 21st, 2016 at 11:20 AM.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  





Click Here to Expand Forum to Full Width

Featured