DuKiccoの雑記

My Life Is Myself

【Yahoo検索結果をエクセルにテーブル形式で抽出する】【エクセル2013,VBA】

Yahoo検索結果をWebクエリを利用してエクセルにテーブル形式で抽出する。

手順
1.Yahoo検索結果をWebクエリによりシート1へ表示
↓Yahoo検索結果(エクセル vba)のWebクエリ
f:id:DuKicco:20161120230522j:plain

2.シート1の内容(いつもの検索結果の並び)をシート2へテーブル形式で並べ直す。
↓並び替え後
f:id:DuKicco:20161120230509j:plain


support.office.com


Sub Yahoo検索結果取得V1()

Application.ScreenUpdating = False
Application.DisplayAlerts = False
    
Dim i As Long, j As Long, k As Long,  arr As Variant, flag As Boolean, HLA As String, Title As String, spage As Long
Dim URLstr As String, SN1 As String
    

    '検索ワード
    sWord = InputBox("検索ワードを入力", "入力", "excel vba")
    
    arr = Split(sWord)
    
    '検索ワードをエンコード
    sWord = UrlEncode(arr(0))
    For i = 1 To UBound(arr)
    
        sWord = sWord & "+" & UrlEncode(arr(i))
    
    Next
    
    'MsgBox sWord
    
    k = 2
    
    '並び替え先シートを作成
    Call SheetAdd("Result")
    SN1 = ActiveSheet.Name
    
    With ActiveSheet
        .Cells(1, 1) = "No"
        .Cells(1, 1).ColumnWidth = 5
        .Cells(1, 2) = "Title"
        .Cells(1, 2).ColumnWidth = 20
        .Cells(1, 3) = "URL"
        .Cells(1, 3).ColumnWidth = 20
        .Cells(1, 4) = "本文"
        .Cells(1, 4).ColumnWidth = 20
    End With
    
    'Webクエリの出力先シートを作成
    Call SheetAdd("Temporary")
    SN2 = ActiveSheet.Name
    
    
    '表示を開始する件数
    spage = 1
    'Yahoo検索URL
    URLstr = "https://search.yahoo.co.jp/search?p=" & sWord & "&aq=-1&ei=UTF-8&fr=top_ga1_sa&b=" & spage
    
    'Webクエリの出力
    With ActiveSheet.QueryTables.Add( _
        Connection:="URL;" & URLstr, _
        Destination:=Range("A1"))
        
        .Name = "Result"
        .WebSelectionType = xlEntirePage
        .WebFormatting = xlWebFormattingAll
        .BackgroundQuery = False
        .Refresh
    End With
    
    
    'MsgBox URLstr
    
    flag = False
    'HyperLinkAddress
    HLA = ""
    'Hyperlinks.TextToDisplay
    Title = ""
    

    '"Webクエリ"シートを2行目から読み込み
    For i = 2 To ActiveSheet.UsedRange.Rows.Count
        
        
        '次へで終了位置と認識
        If InStr(Sheets(SN2).Cells(i, 1), "次へ>") > 0 Then
        
            Exit For
        
        End If
        
        
        If Cells(i, 1).Hyperlinks.Count > 0 Then
            On Error Resume Next
                'HyperLinkAddressに「yahoo.yahoofs,javascript」など含まれるものは対象外と判断
                If InStr(Sheets(SN2).Cells(i, 1).Hyperlinks(1).Address, "yahoo.co.jp") > 0 Or InStr(Sheets(SN2).Cells(i, 1).Hyperlinks(1).Address, "cache.yahoofs.jp") > 0 Or InStr(Sheets(SN2).Cells(i, 1).Hyperlinks(1).Address, "javascript") > 0 Then
                Else
                Title = Sheets(SN2).Cells(i, 1).Hyperlinks(1).TextToDisplay
                HLA = Sheets(SN2).Cells(i, 1).Hyperlinks(1).Address
                
                '開始位置の判断
                If Title = "" Or HLA = "" Then
                    flag = False
                    Else
                    
                    flag = True
                End If
                
                End If
            On Error GoTo 0
            
            Else
            
            tst = Sheets(SN2).Cells(i, 1)
            'Debug.Print tst

            If flag = True Then
                
                If Sheets(SN1).Cells(k - 1, 2).Value <> Title Then
                
                Sheets(SN1).Cells(k, 1) = k - 1
                Sheets(SN1).Hyperlinks.Add anchor:=Sheets(SN1).Cells(k, 2), _
                    Address:=HLA, _
                    TextToDisplay:=Title
                Sheets(SN1).Cells(k, 3) = HLA
                Sheets(SN1).Cells(k, 4) = tst
                
                    k = k + 1
                End If
            End If

            'Debug.Print i & HLA & "_" & Title & "_"; tst
        End If
        
        
    Next i
    

    Application.DisplayAlerts = True
    Application.ScreenUpdating = True
    
    MsgBox "完了"
    
End Sub

' URL Encode(64bit)
Function UrlEncode(ByVal TString As String) As String
  Dim TempObj As Object
  Dim TempEl As Object
  
  TString = Replace(TString, "\", "\\")
  TString = Replace(TString, "'", "\'")
  
  Set TempObj = CreateObject("htmlfile")
  Set TempEl = TempObj.createElement("span")
  
  TempEl.setAttribute "id", "result"
  TempObj.appendChild TempEl
  TempObj.parentWindow.execScript "document.getElementById('result').innerText = encodeURIComponent('" & TString & "');", "JScript"
  UrlEncode = TempEl.innerText
  
End Function

Sub SheetAdd(SNHead As String)
'指定された文字ではじまるのシート名がかぶらぬよう連番で加える

Dim mySheet As Worksheet
Dim i As Long, tst As String, flag As Boolean
    
    
'Max100Sheet
For i = 1 To 100
    
    flag = True
    
    tst = SNHead & i
    
        For Each mySheet In Worksheets
            If mySheet.Name = tst Then '名前がかぶったらfalse
                flag = False
            End If
        Next
    
        If flag Then
            Worksheets.Add
            ActiveSheet.Name = tst

            Exit Sub
        End If
 Next



End Sub