Home Referencing Cells from Spreadsheet and Populating Corresponding Cells
Reply: 1

Referencing Cells from Spreadsheet and Populating Corresponding Cells

josephkane
1#
josephkane Published in 2018-01-13 06:37:58Z

Edit: More information - the objective of this program is to pull from an existing list of names, search the website, and bring back the corresponding NPI numbers. Thanks to user @omegastripes I was advised to shift my focus to XHR. My question is regarding, how to populate the search with the names of the providers, and loop so that it will return the NPI's in the next cells over in the spread sheet for the remaining providers.

Related, what to do in the event nothing populates from the search

original post: Title - Do you want to continue? Internet Explorer pop up - VBA

Internet Security pop up prevents my code from continuing. Normally I would disable this request but my computer security access is limited due to using a work computer.

My question, is there a way to click "Yes" on this pop up using VBA?

Here is my code so far.

Sub GetNpi()

Dim ie As Object

'create a new instance of ie
Set ie = New InternetExplorer
ie.Visible = True

'goes to site
ie.navigate "npinumberlookup.org"
Do While ie.readyState <> READYSTATE_COMPLETE
     Loop

Set ieDoc = ie.document

'select search box last name and Fill in Search Box
ie.document.getElementById("last").Focus
ie.document.getElementById("last").Value = "testlastname"

'select search box first name and Fill in Search Box
ie.document.getElementById("first").Focus
ie.document.getElementById("first").Value = "testfirstname"

Do While ie.readyState <> READYSTATE_COMPLETE
     Loop

'select state drop down box enter TX
ie.document.getElementById("pracstate").Focus
ie.document.getElementById("pracstate").Value = "TX"

'click submit button
ie.document.getElementById("submit").Click

omegastripes
2#
omegastripes Reply to 2018-01-15 21:18:26Z

Update

Try the below code to retrieve NPI for the names from the worksheet (specify last name, first name and state):

Option Explicit

Sub TestListNPI()

    ' Prefix type + func
    ' Type: s - string, l - long, a - array
    ' Func: q - query, r - result
    Dim i As Long
    Dim j As Long
    Dim k As Long
    Dim sqLN As String
    Dim sqFN As String
    Dim aqFN
    Dim sqSt As String
    Dim arHdr
    Dim arRows
    Dim srMsg As String
    Dim srLN  As String
    Dim srFN As String
    Dim arFN
    Dim lrMNQty As Long
    Dim sOutput As String

    i = 2
    With Sheets(1)
        Do
            sqLN = .Cells(i, 1)
            If sqLN = "" Then Exit Do
            .Cells(i, 4) = "..."
            sqFN = .Cells(i, 2).Value
            aqFN = Split(sqFN)
            sqSt = "" & .Cells(i, 3)
            GetNPIData sqLN, aqFN(0), sqSt, arHdr, arRows, srMsg
            If srMsg = "OK" Then
                With CreateObject("Scripting.Dictionary")
                    For j = 0 To UBound(arRows, 1)
                        Do
                            srLN = arRows(j, 1)
                            If LCase(srLN) <> LCase(sqLN) Then Exit Do ' Last names should match
                            srFN = arRows(j, 3)
                            arFN = Split(srFN)
                            If LCase(arFN(0)) <> LCase(aqFN(0)) Then Exit Do ' First names should match
                            lrMNQty = UBound(arFN)
                            If UBound(aqFN) < lrMNQty Then lrMNQty = UBound(aqFN)
                            For k = 1 To lrMNQty
                                Select Case True
                                    Case LCase(arFN(k)) = LCase(aqFN(k)) ' Full match
                                    Case Len(arFN(k)) = 1 And LCase(arFN(k)) = LCase(Left(aqFN(k), 1)) ' First letter match
                                    Case Len(arFN(k)) = 2 And Right(arFN(k), 1) = "." And LCase(Left(arFN(k), 1)) = LCase(Left(aqFN(k), 1)) ' First letter with dot match
                                    Case Else ' No matches
                                        Exit Do
                                End Select
                            Next
                            .Add arRows(j, 0), arRows(j, 1) & " " & arRows(j, 3)
                        Loop Until True
                    Next
                    Select Case .Count
                        Case 0
                            sOutput = "No matches"
                        Case 1
                            sOutput = .Keys()(0)
                        Case Else
                            sOutput = Join(.Items(), vbCrLf)
                    End Select
                End With
            Else
                sOutput = srMsg
            End If
            .Cells(i, 4) = sOutput
            DoEvents
            i = i + 1
        Loop
    End With
    MsgBox "Completed"

End Sub

Sub GetNPIData(sLastName, sFirstName, sState, aResultHeader, aResultRows, sStatus)

    Dim sContent As String
    Dim i As Long
    Dim j As Long
    Dim aHeader() As String
    Dim aRows() As String

    ' Retrieve HTML content via XHR
    With CreateObject("MSXML2.XMLHTTP")
        .Open "POST", "http://npinumberlookup.org/getResults.php", False
        .SetRequestHeader "content-type", "application/x-www-form-urlencoded"
        .Send _
            "last=" & EncodeUriComponent(sLastName) & _
            "&first=" & EncodeUriComponent(sFirstName) & _
            "&pracstate=" & EncodeUriComponent(sState) & _
            "&npi=" & _
            "&submit=Search" ' Setup request parameters
        sContent = .ResponseText
    End With
    ' Parse with RegEx
    Do ' For break
        With CreateObject("VBScript.RegExp")
            .Global = True
            .MultiLine = True
            .IgnoreCase = True
            ' Minor HTML simplification
            .Pattern = "<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>|&nbsp;|\r|\n|\t"
            sContent = .Replace(sContent, "")
            .Pattern = "<a [^>]*href=""([^""]*)"".*?</td>"
            sContent = .Replace(sContent, "$1</td>")
            .Pattern = "<(\w+)\b[^>]+>"
            sContent = .Replace(sContent, "<$1>")
           ' Extract header
            .Pattern = "<tr>((?:<th>.*?</th>)+)</tr>"
            With .Execute(sContent)
                If .Count <> 1 Then
                    sStatus = "No header"
                    Exit Do
                End If
            End With
            .Pattern = "<th>(.*?)</th>"
            With .Execute(sContent)
                ReDim aHeader(0, 0 To .Count - 1)
                For i = 0 To .Count - 1
                    aHeader(0, i) = .Item(i).SubMatches(0)
                Next
            End With
            aResultHeader = aHeader
           ' Extract data
            .Pattern = "<tr>((?:<td>.*?</td>)+)</tr>"
            With .Execute(sContent)
                If .Count = 0 Then
                    sStatus = "No rows"
                    Exit Do
                End If
                ReDim aRows(0 To .Count - 1, 0)
                For i = 0 To .Count - 1
                    aRows(i, 0) = .Item(i).SubMatches(0)
                Next
            End With
            .Pattern = "<td>(.*?)</td>"
            For i = 0 To UBound(aRows, 1)
                With .Execute(aRows(i, 0))
                    For j = 0 To .Count - 1
                        If UBound(aRows, 2) < j Then ReDim Preserve aRows(UBound(aRows, 1), j)
                        aRows(i, j) = Trim(.Item(j).SubMatches(0))
                    Next
                End With
            Next
            aResultRows = aRows
        End With
        sStatus = "OK"
    Loop Until True

End Sub

Function EncodeUriComponent(sText)
    Static oHtmlfile As Object
    If oHtmlfile Is Nothing Then
        Set oHtmlfile = CreateObject("htmlfile")
        oHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
    End If
    EncodeUriComponent = oHtmlfile.parentWindow.encode(sText)
End Function

The output for me is as follows:

For multiply entries all names are output in the last column instead of NPI.

Some explanation of the code. Generally RegEx's aren't recommended for HTML parsing, so there is disclaimer. Data being processed in this case is quite simple that is why it is parsed with RegEx. About RegEx: introduction (especially syntax), introduction JS, VB flavor. Simplification makes HTML code suitable for parsing in some degree. Patterns:

  • <(?!/td|/tr|/th|td|tr|th|a href)[^>]*>|&nbsp;|\r|\n|\t is for removing white-spaces, and all tags but table markup and links by replacing with "".
  • <a [^>]*href="([^"]*)".*?</td> keeps link address by replacing with $1</td>.
  • <(\w+)\b[^>]+> removes all unnecessary tag attributes by replacing with <$1>.
  • <tr>((?:<th>.*?</th>)+)</tr> matches each table header row.
  • <th>(.*?)</th> matches each header cell.
  • <tr>((?:<td>.*?</td>)+)</tr> matches each table data row.
  • <td>(.*?)</td> matches each data cell.

Look into how does the HTML content is changed on each step of replacemnets.

Initial answer

Avoid pop up appearing instead of bothering with it.

Make sure you are using secure HTTP protocol https://npinumberlookup.org.

You may even not use IE for webscraping at all, XHR is better choice, as it is more reliable and fast, though it requires some knowledge and experience. Here is the simple example of that:

Option Explicit

Sub Test()

    Dim sContent As String
    Dim i As Long
    Dim j As Long
    Dim aHeader() As String
    Dim aRows() As String

    ' Retrieve HTML content via XHR
    With CreateObject("MSXML2.XMLHTTP")
        .Open "POST", "http://npinumberlookup.org/getResults.php", False
        .SetRequestHeader "content-type", "application/x-www-form-urlencoded"
        .Send _
            "last=smith" & _
            "&first=michael" & _
            "&pracstate=NC" & _
            "&npi=" & _
            "&submit=Search" ' Setup request parameters
        sContent = .ResponseText
    End With
    ' Parse with RegEx
    Do ' For break
        With CreateObject("VBScript.RegExp")
            .Global = True
            .MultiLine = True
            .IgnoreCase = True
            ' Minor HTML simplification
            .Pattern = "<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>|&nbsp;|\r|\n|\t"
            sContent = .Replace(sContent, "")
            .Pattern = "<a [^>]*href=""([^""]*)"".*?</td>"
            sContent = .Replace(sContent, "$1</td>")
            .Pattern = "<(\w+)\b[^>]+>"
            sContent = .Replace(sContent, "<$1>")
           ' Extract header
            .Pattern = "<tr>((?:<th>.*?</th>)+)</tr>"
            With .Execute(sContent)
                If .Count <> 1 Then
                    MsgBox "No header found"
                    Exit Do
                End If
            End With
            .Pattern = "<th>(.*?)</th>"
            With .Execute(sContent)
                ReDim aHeader(0, 0 To .Count - 1)
                For i = 0 To .Count - 1
                    aHeader(0, i) = .Item(i).SubMatches(0)
                Next
            End With
           ' Extract data
            .Pattern = "<tr>((?:<td>.*?</td>)+)</tr>"
            With .Execute(sContent)
                If .Count = 0 Then
                    MsgBox "No rows found"
                    Exit Do
                End If
                ReDim aRows(0 To .Count - 1, 0)
                For i = 0 To .Count - 1
                    aRows(i, 0) = .Item(i).SubMatches(0)
                Next
            End With
            .Pattern = "<td>(.*?)</td>"
            For i = 0 To UBound(aRows, 1)
                With .Execute(aRows(i, 0))
                    For j = 0 To .Count - 1
                        If UBound(aRows, 2) < j Then ReDim Preserve aRows(UBound(aRows, 1), j)
                        aRows(i, j) = .Item(j).SubMatches(0)
                    Next
                End With
            Next
        End With
    Loop Until True
    ' Output
    With ThisWorkbook.Sheets(1)
        .Cells.Delete
         Output2DArray .Cells(1, 1), aHeader
         Output2DArray .Cells(2, 1), aRows
         .Columns.AutoFit
    End With
    MsgBox "Completed"

End Sub

Sub Output2DArray(oDstRng As Range, aCells As Variant)

    With oDstRng
        .Parent.Select
        With .Resize( _
                UBound(aCells, 1) - LBound(aCells, 1) + 1, _
                UBound(aCells, 2) - LBound(aCells, 2) + 1)
            .NumberFormat = "@"
            .Value = aCells
        End With
    End With

End Sub

All the data in the code could be easily obtained from browser developer tools on network tab after you click submit, as an example:

The above code returns the output for me as follows:

You need to login account before you can post.

About| Privacy statement| Terms of Service| Advertising| Contact us| Help| Sitemap|
Processed in 0.41091 second(s) , Gzip On .

© 2016 Powered by mzan.com design MATCHINFO