' wget_img.vbs
Call Main()
Sub Main()
' CMD 模式
If Not (LCase(Right(WScript.FullName,11)) = LCase("CScript.exe")) Then
CreateObject("WScript.Shell").Run "cscript.exe //nologo """ WScript.ScriptFullName """", 1, False
WScript.Quit(1)
End If
Dim wso, strMeDir
Set wso = WScript.CreateObject("WScript.Shell")
strMeDir = Left(WScript.ScriptFullName, InStrRev(WScript.ScriptFullName,"\")-1)
' 啟動 wget下載網站所有頁面到本腳本目錄的 720.hao2046.net 文件夾
WScript.Echo "1. 啟動 wget下載網站所有頁面到本腳本目錄的 720.hao2046.net 文件夾 ……"
wso.Run "wget -r -p -k -c -x -A=jpg,htm,html 720.hao2046.net -P """ strMeDir """", 1, True
' 掃描 720.hao2046.net 文件夾中所有文件
WScript.Echo "2. 掃描 720.hao2046.net 文件夾中所有文件 ……"
Dim strFolderspec, strHTML, strURL
Dim arr() : ReDim Preserve arr(0)
strFolderspec = strMeDir "\720.hao2046.net"
Call ScanFolder(arr, strFolderspec)
' 建立正則表達式。
Dim regEx
Set regEx = CreateObject("VBScript.RegExp") ' 建立正則表達式。
regEx.IgnoreCase = True ' 設置是否區(qū)分大小寫。
regEx.Global = True ' 設置全局替換。
regEx.MultiLine = True ' 設置多行匹配模式
' 查找所有文件
WScript.Echo "3. 讀取 720.hao2046.net 文件夾中的所有網頁,匹配圖片 URL 地址 ……"
For i = 0 To UBound(arr)
If LCase(Right(arr(i),5)) = ".html" Or LCase(Right(arr(i),4)) = ".htm" Then
' 讀取文件,匹配圖片 URL 地址
strHTML = ReadPfile(arr(i), "gb2312")
regEx.Pattern = "src=['""]http://\S+\.jpg['""]"
Set Matches = regEx.Execute(strHTML) ' 執(zhí)行搜索。
For Each Match in Matches ' 遍歷匹配集合。
If Not Match.Value = "" Then
regEx.Pattern = "(src=['""])*(['""])*"
strURL = strURL regEx.Replace(Match.Value, "") vbCrLf
End If
Next
End If
Next
' 保存所有圖片 URL 地址
WScript.Echo "4. 保存所有圖片 URL 地址到 url-img.txt 文件 ……"
Call SavePfile(strMeDir "\url-img.txt", "utf-8", strURL)
' 啟動 wget 下載圖片到本腳本 img 目錄
WScript.Echo "5. 啟動 wget 下載 url-img.txt 指定的圖片到本腳本 img 目錄 ……"
wso.Run "wget -c -x -t 5 -i """ strMeDir "\url-img.txt"" -P """ strMeDir "\img""", 1, True
Msgbox "完成!"
End Sub
'===========================================================================================
'按編碼讀取txt文件內容
Function ReadPfile(ByVal FileName, ByVal FileCode)
Dim objStream
Set objStream = CreateObject("ADODB.Stream")
'
With objStream
.Type = 2
.Mode = 3
.open
.Charset = FileCode '不同編碼時自己換,Chinese (Simplified) (GB2312),中文 GBK ,繁體中文 Big5 ,日文 EUC-JP ,韓文 EUC-KR,charset=UTF-8(國際化編碼),ANSI,Unicode,unicode big endian
.LoadFromFile FileName
ReadPfile = .ReadText
.Close
End With
Set objStream = Nothing
End Function
'===========================================================================================
'保存文件為unicode格式文本
Function SavePfile(ByVal FileName, ByVal FileCode, ByVal TextString)
Dim objStream
Set objStream = CreateObject("ADODB.Stream")
With objStream
.Type = 2
.Mode = 3
.Charset = FileCode '不同編碼時自己換,Chinese (Simplified) (GB2312),中文 GBK ,繁體中文 Big5 ,日文 EUC-JP ,韓文 EUC-KR,charset=UTF-8(國際化編碼),ANSI,Unicode,unicode big endian
.open
.WriteText TextString
.SaveToFile FileName, 2
.Close
End With
Set objStream = Nothing
End Function
' Dim arr() : ReDim Preserve arr(0)
' Call ScanFolder(arr, "V:\")
Sub ScanFolder(ByRef arr, ByVal strFolderspec)
On Error Resume Next
Dim fso, objFolder
Set fso = Createobject("Scripting.FileSystemObject")
Set objFolder = fso.getfolder(strFolderspec)
ReDim Preserve arr(UBound(arr)+1)
arr(UBound(arr)) = strFolderspec "\"
For Each subFile In objFolder.files
ReDim Preserve arr(UBound(arr)+1)
arr(UBound(arr)) = subFile.path
Next
For Each subFolder In objFolder.subfolders
ScanFolder arr, subFolder.path
Next
Set fso = NoThing
Set objFolder = NoThing
End Sub
' findstr_html.vbs
Call Main()
Sub Main()
' CMD 模式
If Not (LCase(Right(WScript.FullName,11)) = LCase("CScript.exe")) Then
CreateObject("WScript.Shell").Run "cscript.exe //nologo """ WScript.ScriptFullName """", 1, False
WScript.Quit(1)
End If
Dim strMeDir
strMeDir = Left(WScript.ScriptFullName, InStrRev(WScript.ScriptFullName,"\")-1)
Dim regEx, strHTML, strURL
' 掃描文件夾
Dim arr() : ReDim Preserve arr(0)
Call ScanFolder(arr, strMeDir "\720.hao2046.net")
If UBound(arr) = 0 Then
WScript.Echo strMeDir "\720.hao2046.net" ", Not Found!"
Exit Sub
End If
' 建立正則表達式。
Set regEx = CreateObject("VBScript.RegExp") ' 建立正則表達式。
regEx.IgnoreCase = True ' 設置是否區(qū)分大小寫。
regEx.Global = True ' 設置全局替換。
regEx.MultiLine = True ' 設置多行匹配模式
Do
strPattern = InputBox("請輸入要匹配的正則表達式:","查找所有網頁文件","123456")
strInfo = strPattern vbCrLf "Not Found!"
For i = 0 To UBound(arr)
If LCase(Right(arr(i),5)) = ".html" Or LCase(Right(arr(i),4)) = ".htm" Then
'WScript.Echo arr(i)
strHTML = ReadPfile(arr(i), "gb2312")
If InStr(strHTML, strPattern)>0 Then
strInfo = strPattern vbCrLf arr(i) vbCrLf
Exit For
Else
'regEx.Pattern = "src=['""]http://\S+\.jpg['""]"
regEx.Pattern = strPattern
Set Matches = regEx.Execute(strHTML) ' 執(zhí)行搜索。
For Each Match in Matches ' 遍歷匹配集合。
If Not Match.Value = "" Then
'regEx.Pattern = "(src=['""])*(['""])*"
'strURL = strURL regEx.Replace(Match.Value, "") vbCrLf
strInfo = strPattern vbCrLf arr(i) vbCrLf
Exit For
End If
Next
End If
End If
Next
WScript.Echo strInfo
Loop
End Sub
'===========================================================================================
'按編碼讀取txt文件內容
Function ReadPfile(ByVal FileName, ByVal FileCode)
Dim objStream
Set objStream = CreateObject("ADODB.Stream")
'
With objStream
.Type = 2
.Mode = 3
.open
.Charset = FileCode '不同編碼時自己換,Chinese (Simplified) (GB2312),中文 GBK ,繁體中文 Big5 ,日文 EUC-JP ,韓文 EUC-KR,charset=UTF-8(國際化編碼),ANSI,Unicode,unicode big endian
.LoadFromFile FileName
ReadPfile = .ReadText
.Close
End With
Set objStream = Nothing
End Function
' Dim arr() : ReDim Preserve arr(0)
' Call ScanFolder(arr, "V:\")
Sub ScanFolder(ByRef arr, ByVal strFolderspec)
On Error Resume Next
Dim fso, objFolder
Set fso = Createobject("Scripting.FileSystemObject")
Set objFolder = fso.getfolder(strFolderspec)
ReDim Preserve arr(UBound(arr)+1)
arr(UBound(arr)) = strFolderspec "\"
For Each subFile In objFolder.files
ReDim Preserve arr(UBound(arr)+1)
arr(UBound(arr)) = subFile.path
Next
For Each subFolder In objFolder.subfolders
ScanFolder arr, subFolder.path
Next
Set fso = NoThing
Set objFolder = NoThing
End Sub