Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
664 views
in Technique[技术] by (71.8m points)

performance - Optimize Speed of Recursive File Search in Subdirectories?

I'm looking for the fastest-performing method to recursively search subdirectories for a filepattern using an Excel macro. Excel VBA seems to be rather slow at this.

Things I've tried so far (some based on other stackoverflow suggestions):

  • Exclusive use of Dir to recurse through subdirectories and search for the filepattern in each folder. (slowest)
  • Iterating through FileSystemObject Folders using Folder.Files collection, checking each file against filepattern. (better, but still slow)
  • Iterating through FileSystemObject Folders, and then using Dir to check each folder for the filepattern (fastest so far, but this is still taking several seconds per file and I'd like to optimize if possible)

I looked in to My.Computer.FileSystem.GetFiles, which seems like it would be perfect (allows you to specify a wildcard pattern and search subfolders with a single command) - but it doesn't appear to be supported in Excel VBA from what I can tell, only in VB.

I'm currently using the FindFile Sub below, which has the best performance so far. If anyone has suggestions for how to further improve this, I would be very grateful!

Option Explicit
Private Declare Function GetTickCount Lib "kernel32" () As Long


Function Recurse(sPath As String, targetName As String) As String

    Dim FSO As New FileSystemObject
    Dim myFolder As Folder
    Dim mySubFolder As Folder
    Dim myFile As File

    On Error Resume Next
    Set myFolder = FSO.GetFolder(sPath)
    If Err.Number <> 0 Then
        MsgBox "Error accessing " & sPath & ". The macro will abort."
        Err.Clear
        Exit Function
    End If
    On Error GoTo 0

    Dim foundFolderPath As String
    Dim foundFileName As String
    foundFolderPath = ""
    foundFileName = ""

    For Each mySubFolder In myFolder.SubFolders

        foundFileName = Dir(mySubFolder.Path & "" & targetName & "*")
        If foundFileName <> vbNullString Then
            foundFolderPath = mySubFolder.Path & "" & foundFileName
        End If

        If foundFolderPath <> vbNullString Then
            Recurse = foundFolderPath
            Exit Function
        End If

        foundFolderPath = Recurse(mySubFolder.Path, targetName)

        If foundFolderPath <> vbNullString Then
            Recurse = foundFolderPath
            Exit Function
        End If
    Next

End Function


Sub FindFile()

    Dim start As Long
    start = GetTickCount()

    Dim targetName As String
    Dim targetPath As String
    targetName = Range("A1").Value 'Target file name without extension
    targetPath = "C:Example" & Range("B1").Value 'Subfolder name

    Dim target As String
    target = Recurse(targetPath, targetName)

    Dim finish As Long
    finish = GetTickCount()

    MsgBox "found: " & target & vbNewLine & vbNewLine & (finish - start) & " milliseconds"

End Sub

Updated File Search Function Based on Accepted Answer

This version of FindFile() performs about twice as fast as the method I originally pasted in the question above. As discussed in the posts below, this should work for 32 or 64-bit versions of Excel 2010 and newer.

Option Explicit

Private Declare PtrSafe Function FindClose Lib "kernel32" (ByVal hFindFile As LongPtr) As Long
Private Declare PtrSafe Function FindFirstFileW Lib "kernel32" (ByVal lpFileName As LongPtr, ByVal lpFindFileData As LongPtr) As LongPtr
Private Declare PtrSafe Function FindNextFileW Lib "kernel32" (ByVal hFindFile As LongPtr, ByVal lpFindFileData As LongPtr) As LongPtr

Private Type FILETIME
  dwLowDateTime  As Long
  dwHighDateTime As Long
End Type

Const MAX_PATH  As Long = 260
Const ALTERNATE As Long = 14

' Can be used with either W or A functions
' Pass VarPtr(wfd) to W or simply wfd to A
Private Type WIN32_FIND_DATA
  dwFileAttributes As Long
  ftCreationTime   As FILETIME
  ftLastAccessTime As FILETIME
  ftLastWriteTime  As FILETIME
  nFileSizeHigh    As Long
  nFileSizeLow     As Long
  dwReserved0      As Long
  dwReserved1      As Long
  cFileName        As String * MAX_PATH
  cAlternate       As String * ALTERNATE
End Type

Private Const FILE_ATTRIBUTE_DIRECTORY As Long = 16 '0x10
Private Const INVALID_HANDLE_VALUE As LongPtr = -1

Function Recurse(folderPath As String, fileName As String)
    Dim fileHandle    As LongPtr
    Dim searchPattern As String
    Dim foundPath     As String
    Dim foundItem     As String
    Dim fileData      As WIN32_FIND_DATA

    searchPattern = folderPath & "*"

    foundPath = vbNullString
    fileHandle = FindFirstFileW(StrPtr(searchPattern), VarPtr(fileData))
    If fileHandle <> INVALID_HANDLE_VALUE Then
        Do
            foundItem = Left$(fileData.cFileName, InStr(fileData.cFileName, vbNullChar) - 1)

            If foundItem = "." Or foundItem = ".." Then 'Skip metadirectories
            'Found Directory
            ElseIf fileData.dwFileAttributes And FILE_ATTRIBUTE_DIRECTORY Then
                foundPath = Recurse(folderPath & "" & foundItem, fileName)
            'Found File
            'ElseIf StrComp(foundItem, fileName, vbTextCompare) = 0 Then 'these seem about equal
            ElseIf InStr(1, foundItem, fileName, vbTextCompare) > 0 Then 'for performance
                foundPath = folderPath & "" & foundItem
            End If

            If foundPath <> vbNullString Then
                Recurse = foundPath
                Exit Function
            End If

        Loop While FindNextFileW(fileHandle, VarPtr(fileData))
    End If

    'No Match Found
    Recurse = vbNullString
End Function

Sub FindFile()

    Dim targetName As String
    Dim targetPath As String
    targetName = Range("A4").Value
    targetPath = "C:Example" & Range("B4").Value

    Dim target As String
    target = Recurse(targetPath, targetName)

    MsgBox "found: " & target

End Sub
See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)

I had a similar performance issue with I resolved with win API functions as suggested above, my problem was slightly different to yours as I didn't need to recursively search a directory tree, I just pulled filenames from a given folder into a collection, but you can probably adapt my code:

'for windows API call to FindFirstFileEx
Private Const INVALID_HANDLE_VALUE = -1
Private Const MAX_PATH = 260

Private Type FILETIME
    dwLowDateTime   As Long
    dwHighDateTime  As Long
End Type

Private Type WIN32_FIND_DATA
    dwFileAttributes    As Long
    ftCreationTime      As FILETIME
    ftLastAccessTime    As FILETIME
    ftLastWriteTime     As FILETIME
    nFileSizeHigh       As Long
    nFileSizeLow        As Long
    dwReserved0         As Long
    dwReserved1         As Long
    cFileName           As String * MAX_PATH
    cAlternate          As String * 14
End Type

Private Const FIND_FIRST_EX_CASE_SENSITIVE  As Long = 1
'MSDN: "This value is not supported until Windows Server 2008 R2 and Windows 7."
Private Const FIND_FIRST_EX_LARGE_FETCH     As Long = 2

Private Enum FINDEX_SEARCH_OPS
    FindExSearchNameMatch
    FindExSearchLimitToDirectories
    FindExSearchLimitToDevices
End Enum

Private Enum FINDEX_INFO_LEVELS
    FindExInfoStandard
    FindExInfoBasic 'MSDN: "This value is not supported until Windows Server 2008 R2 and Windows 7."
    FindExInfoMaxInfoLevel
End Enum

Private Declare Function FindFirstFileEx Lib "kernel32" Alias "FindFirstFileExA" ( _
ByVal lpFileName As String, ByVal fInfoLevelId As Long, lpFindFileData As WIN32_FIND_DATA, _
    ByVal fSearchOp As Long, ByVal lpSearchFilter As Long, ByVal dwAdditionalFlags As Long) As Long
Private Declare Function FindNextFile Lib "kernel32" Alias "FindNextFileA" ( _
    ByVal hFindFile As Long, lpFindFileData As WIN32_FIND_DATA) As Long
Private Declare Function FindClose Lib "kernel32" (ByVal hFindFile As Long) As Long


Private Function GetFileNames(ByVal sPath As String) As Collection

    Dim fileInfo    As WIN32_FIND_DATA  'buffer for file info
    Dim hFile       As Long             'file handle
    Dim colFiles    As New Collection

    sPath = sPath & "*.*"

    hFile = FindFirstFileEx(sPath & vbNullChar, FindExInfoBasic, fileInfo, FindExSearchNameMatch, 0&, FIND_FIRST_EX_LARGE_FETCH)

    If hFile <> INVALID_HANDLE_VALUE Then
        Do While FindNextFile(hFile, fileInfo)
            colFiles.Add Left(fileInfo.cFileName, InStr(fileInfo.cFileName, vbNullChar) - 1)
        Loop

        FindClose hFile
    End If

    Set GetFileNames = colFiles

End Function

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

2.1m questions

2.1m answers

60 comments

57.0k users

...