在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★★ Given a list of directory info including directory path, and all the files with contents in this directory, you need to find out all the groups of duplicate files in the file system in terms of their paths. A group of duplicate files consists of at least two files that have exactly the same content. A single directory info string in the input list has the following format:
It means there are n files ( The output is a list of group of duplicate file paths. For each group, it contains all the file paths of the files that have the same content. A file path is a string that has the following format:
Example 1: Input: ["root/a 1.txt(abcd) 2.txt(efgh)", "root/c 3.txt(abcd)", "root/c/d 4.txt(efgh)", "root 4.txt(efgh)"] Output: [["root/a/2.txt","root/c/d/4.txt","root/4.txt"],["root/a/1.txt","root/c/3.txt"]] Note:
Follow-up beyond contest:
给定一个目录信息列表,包括目录路径,以及该目录中的所有包含内容的文件,您需要找到文件系统中的所有重复文件组的路径。一组重复的文件至少包括二个具有完全相同内容的文件。 输入列表中的单个目录信息字符串的格式如下:
这意味着有 n 个文件( 该输出是重复文件路径组的列表。对于每个组,它包含具有相同内容的文件的所有文件路径。文件路径是具有下列格式的字符串:
示例 1: 输入: ["root/a 1.txt(abcd) 2.txt(efgh)", "root/c 3.txt(abcd)", "root/c/d 4.txt(efgh)", "root 4.txt(efgh)"] 输出: [["root/a/2.txt","root/c/d/4.txt","root/4.txt"],["root/a/1.txt","root/c/3.txt"]] 注:
超越竞赛的后续行动:
584ms 1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var ctp = [String: [String]]() 4 for path in paths { 5 let comps = path.split(separator: " ") 6 let dir = comps[0] 7 for file in comps[1...] { 8 let (fullPath, contents) = analyzeFile(file, dir) 9 ctp[contents, default: []].append(fullPath) 10 } 11 } 12 return Array(ctp.values).filter { $0.count > 1 } 13 } 14 15 private func analyzeFile(_ file: String.SubSequence, _ dir: String.SubSequence) -> (String, String) { 16 let startIndex = file.index(of: "(")! 17 let endIndex = file.index(before: file.endIndex) 18 let contents = String(file[file.index(after: startIndex)..<endIndex]) 19 let path = String(file[file.startIndex..<startIndex]) 20 return ("\(dir)/\(path)", contents) 21 } 22 } 700ms 1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var map = [String: [String]]() 4 for path in paths { 5 let contents = extractContent(path) 6 for content in contents { 7 map[content[1], default: [String]()].append(content[0]) 8 } 9 } 10 var result = [[String]]() 11 for key in map.keys { 12 if let tmpPaths = map[key] { 13 if tmpPaths.count > 1 { 14 result.append(tmpPaths) 15 } 16 } 17 } 18 return result 19 } 20 21 private func extractContent(_ str: String) -> [[String]] { 22 let arr = str.split(separator: " ") 23 let root = arr[0] 24 var result = [[String]]() 25 for i in 1 ..< arr.count { 26 let str = arr[i] 27 let left = str.firstIndex(of: "(")! 28 let right = str.lastIndex(of: ")")! 29 let filename = String(str[..<left]) 30 let content = String(str[left ... right]) 31 result.append(["\(root)/\(filename)", content]) 32 } 33 return result 34 } 35 } 908ms 1 class Solution { 2 3 typealias Content = String 4 typealias FilePath = String 5 typealias ExtractedContentPath = (content: Content, filepath: FilePath) 6 7 func findDuplicate(_ paths: [String]) -> [[String]] { 8 9 let contentFileTable: [Content: [FilePath]] = paths.lazy 10 .flatMap { self.extractedContentPaths($0) } 11 .reduce(into: [Content: [FilePath]]()){ (dict: inout [Content: [FilePath]], extractedContentPath: ExtractedContentPath) in 12 dict[extractedContentPath.content, default: [FilePath]()].append(extractedContentPath.filepath) 13 } 14 15 return contentFileTable.values.filter { $0.count > 1 } 16 17 } 18 19 private func extractedContentPaths(_ input: String) -> [ExtractedContentPath] { 20 let tokens = input.components(separatedBy: .whitespaces) 21 let directory = tokens.first! 22 return tokens.indices.dropFirst() 23 .lazy.map { extractedContentPath(from: tokens[$0], directory: directory) } 24 } 25 26 private func extractedContentPath(from fileAndContent: String, directory: String) -> ExtractedContentPath { 27 let tokens = fileAndContent.dropLast().components(separatedBy: "(") 28 return ExtractedContentPath(content: tokens.last!, filepath: "\(directory)/\(tokens.first!)") 29 } 30 } 992ms 1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var groups = [String: Array<String>]() 4 5 for info in paths { 6 let list = parse(info: info) 7 8 for file in list { 9 groups[file.1, default: Array<String>()].append(file.0) 10 } 11 } 12 13 var result = [[String]]() 14 15 for group in Array(groups.values) { 16 if group.count > 1 { 17 result.append(group) 18 } 19 } 20 21 return result 22 } 23 24 func parse(info: String) -> [(String, String)] { 25 var components = info.components(separatedBy: " ") 26 let path = components.removeFirst() 27 28 var result = [(String, String)]() 29 30 let splitCharSet = CharacterSet(charactersIn: "()") 31 while !components.isEmpty { 32 33 let entry = components.removeFirst() 34 let parts = entry.components(separatedBy: splitCharSet) 35 36 let file = "\(path)/\(parts[0])" 37 let contents = parts[1] 38 39 result.append((file, contents)) 40 } 41 42 return result 43 } 44 } 1232ms 1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var fileMapPaths = [String: [String]]() 4 paths.forEach { (str) in 5 let arrStrs = str.split(separator: " ") 6 let dir = arrStrs[0] 7 for i in 1..<arrStrs.count { 8 let fileInfo = arrStrs[i] 9 let subArrStr = fileInfo.split(separator: "(") 10 let md5 = String(subArrStr[1]) 11 let file = String(subArrStr[0].prefix(subArrStr[0].count)) 12 let filePath = dir + "/" + file 13 var mapPaths = fileMapPaths[md5] ?? [String]() 14 mapPaths.append(filePath) 15 fileMapPaths[md5] = mapPaths 16 } 17 } 18 let ans = fileMapPaths.values.filter { $0.count > 1} 19 return ans 20 } 21 } 1264ms 1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 //create a dictionary with [content: [filepath]], output the value count which is equal and greater than 2 4 var contentToFiles = [String: [String]]() 5 for path in paths { 6 let params = path.split(separator: " ") 7 guard let dir = params.first else { 8 continue 9 } 10 for i in 1 ..< params.count { 11 let fileParams = params[i].split(separator: "(") 12 guard let fileName = fileParams.first, let fileContentWithExtraInfo = fileParams.last else { 13 continue 14 } 15 let fileContent = String(describing: fileContentWithExtraInfo.dropLast()) 16 let filePath = String(describing:dir) + "/" + String(describing: fileName ) 17 contentToFiles[fileContent] = contentToFiles[fileContent, default:[]] + [filePath] 18 } 19 } 20 return contentToFiles.values.filter{$0.count >= 2} 21 22 } 23 } Runtime: 1324 ms
Memory Usage: 26.4 MB
1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var ans:[[String]] = [[String]]() 4 var map:[String:[String]] = [String:[String]]() 5 for path in paths 6 { 7 var temp:[String] = path.components(separatedBy:" ") 8 var root:String = temp[0] 9 for str in temp 10 { 11 var begin:Int = str.findLast("(") 12 var end:Int = str.findFirst(")") 13 if begin + 1 < end 14 { 15 var name:String = str.subString(begin+1,end) 16 var s:String = root + "/" + str.subStringTo(begin - 1) 17 if map[name] == nil 18 { 19 map[name] = [s] 20 } 21 else 22 { 23 map[name]!.append(s) 24 } 25 } 26 } 27 } 28 for val in map.values 29 { 30 if val.count > 1 31 { 32 ans.append(val) 33 } 34 } 35 return ans 36 } 37 } 38 39 //String扩展 40 extension String { 41 // 截取字符串:从起始处到index 42 // - Parameter index: 结束索引 43 // - Returns: 子字符串 44 func subStringTo(_ index: Int) -> String { 45 let theIndex = self.index(self.startIndex,offsetBy:min(self.count,index)) 46 return String(self[startIndex...theIndex]) 47 } 48 49 // 截取字符串:指定索引和字符数 50 // - begin: 开始截取处索引 51 // - count: 截取的字符数量 52 func subString(_ begin:Int,_ count:Int) -> String { 53 let start = self.index(self.startIndex, offsetBy: max(0, begin)) 54 let end = self.index(self.startIndex, offsetBy: min(self.count, begin + count)) 55 return String(self[start..<end]) 56 } 57 58 //从0索引处开始查找是否包含指定的字符串,返回Int类型的索引 59 //返回第一次出现的指定子字符串在此字符串中的索引 60 func findFirst(_ sub:String)->Int { 61 var pos = -1 62 if let range = range(of:sub, options: .literal ) { 63 if !range.isEmpty { 64 pos = self.distance(from:startIndex, to:range.lowerBound) 65 } 66 } 67 return pos 68 } 69 70 //从0索引处开始查找是否包含指定的字符串,返回Int类型的索引 71 //返回最后出现的指定子字符串在此字符串中的索引 72 func findLast(_ sub:String)->Int { 73 var pos = -1 74 if let range = range(of:sub, options: .backwards ) { 75 if !range.isEmpty { 76 pos = self.distance(from:startIndex, to:range.lowerBound) 77 } 78 } 79 return pos 80 } 81 } 1344ms 1 class Solution { 2 func findDuplicate(_ paths: [String]) -> [[String]] { 3 var mapping = [String: [String]]() 4 for i in 0..<paths.count { 5 let arr = paths[i].components(separatedBy: " ") 6 let base = arr[0] + "/" 7 for j in 1..<arr.count { 8 let arrSplit = arr[j].components(separatedBy: "(") 9 mapping[arrSplit[1], default:[String]()].append(base + arrSplit[0]) 10 } 11 } 12 return Array(mapping.values).filter{$0.count > 1} 13 } 14 }
|
请发表评论