Daniel Berger
2007-Oct-21 14:42 UTC
[Win32utils-devel] Taking a stab at a pure Ruby Dir.glob
Hi all, Here''s what I''ve come up with so far for a pure Ruby Dir.glob for MS Windows. It almost works. The problem right now is the [] notation, which I''m not translating properly into a regex. I haven''t started on the ''**'' notation yet either, but I figure that''s more of a control flow issue. Feel free to disagree with me and/or provide a solution. :) # mydir.rb - make sure you have the windows-pr library first. require ''windows/file'' require ''windows/error'' require ''windows/unicode'' require ''windows/directory'' require ''windows/process'' require ''windows/handle'' require ''windows/path'' class MyDir extend Windows::Error extend Windows::File extend Windows::Unicode extend Windows::Directory extend Windows::Process extend Windows::Handle extend Windows::Path include Windows::Error include Windows::File include Windows::Handle def self.glob(pattern, flags = 0) raise TypeError unless flags.is_a?(Integer) dirname = File.dirname(pattern) + "\\*" pattern = File.basename(pattern) recursive = false regex = nil #puts "ORIG PATTERN: ''#{pattern}''" # Convert backslashes to literal backslashes pattern.gsub!("\.", "\\.") # Convert 3 or more ''*'' characters to a single ''*'' pattern.gsub!(/\*{3,}/, ''*'') # Convert leading text to ''/^xxx'' format pattern.sub!(/^(\w)/, ''^\1'') # Convert all remaining literal ''?'' to a ''.'' (any single char). pattern.sub!(/^\?/, ''^.'') pattern.tr!(''?'', ''.'') # Convert all ''*'' to ''.*?'' to get the (nongreedy) intended result. pattern.gsub!("*", ".*?") # Convert {x, y} to (x|y) pattern.gsub!(/\{(.*?)\}/, ''(\1)'') pattern.gsub!(/\,\s*/, ''|'') # Convert {x, y} to (x|y) pattern.gsub!(/\{(.*?)\}/, ''(\1)'') pattern.gsub!(/\,\s*/, ''|'') dp = pattern.dup # This is an attempt to convert ''[]'' sets into # a regular expression. # TODO: Fix! dp.scan(/\[(.*?)\]/){ |array| array.each{ |match| start, finish = match.split(''-'') if finish chars = Range.new(start, finish).to_a else chars = start.split('''') end pattern << ''('' + chars.join(''|'') } pattern << '')'' } pattern << ''$'' pattern.gsub!(/\[.*?\]/, '''') puts "NEW PATTERN: ''#{pattern}''" regex = Regexp.new(pattern) fdata = 0.chr * 320 # 580 if wide array = [] hfind = FindFirstFile(dirname, fdata) if hfind == INVALID_HANDLE_VALUE raise ArgumentError, get_last_error end file = fdata[44, MAX_PATH].unpack("Z*")[0] array << file if regex.match(file) while FindNextFile(hfind, fdata) file = fdata[44, MAX_PATH].unpack("Z*")[0] array << file if regex.match(file) end error = GetLastError() FindClose(hfind) if(error != ERROR_NO_MORE_FILES) raise get_last_error(error) end unless flags & File::FNM_DOTMATCH > 0 array.delete_if{ |file| file =~ /^\..*/ } end array end end Here''s a test suite you can use to verify your results. Additions welcome: ######################################################################## # tc_glob.rb # # Test case for the MyDir.glob class method. ######################################################################## $:.unshift Dir.pwd require ''test/unit'' require ''fileutils'' require ''mydir'' class TC_MyDir_Glob_Class < Test::Unit::TestCase # Helper method to get just the basename of the filename def base(files) files.map{ |f| File.basename(f) } end def setup @foo_files = %w/a.c a.cpp b.c b.h g.rb d/ @bar_files = %w/a.c a2.cpp a3.h a4.rb/ FileUtils.mkdir_p(''foo/bar/baz'') Dir.chdir(''foo''){ @foo_files.each{ |f| FileUtils.touch(f) } } Dir.chdir(''foo/bar''){ @bar_files.each{ |f| FileUtils.touch(f) } } end def test_glob_pattern assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, base(MyDir.glob(''foo/{*}''))) assert_equal(%w/g.rb/, base(MyDir.glob(''foo/{*.rb}''))) assert_equal(%w/a.cpp g.rb/, base(MyDir.glob(''foo/*.{rb,cpp}''))) assert_equal(%w/a.cpp g.rb/, base(MyDir.glob(''foo/*.{rb,cp}*''))) assert_equal([], base(MyDir.glob(''foo/*.{}''))) end def test_glob_char_list assert_equal(%w/d/, base(MyDir.glob(''foo/[a-d]''))) assert_equal(%w/a.c a.cpp/, base(MyDir.glob(''foo/[a]*''))) assert_equal(%w/a.c a.cpp b.c b.h bar d/, base(MyDir.glob(''foo/[a-d]*''))) assert_equal(%w/d g.rb/, base(MyDir.glob(''foo/[^a-b]*''))) if WINDOWS assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, base(MyDir.glob(''foo/[A-Z]*''))) else assert_equal([], base(MyDir.glob(''foo/[A-Z]*''))) end end def test_glob_char_list_edge_cases assert_equal([], MyDir.glob(''foo/[]'')) assert_equal([''d''], base(MyDir.glob(''foo/[^]''))) end def test_glob_question_mark assert_equal(%w/a.c/, base(MyDir.glob(''foo/a.?''))) assert_equal(%w/a.cpp/, base(MyDir.glob(''foo/a.c?p''))) assert_equal(%w/a.c b.c b.h bar/, base(MyDir.glob(''foo/???''))) assert_equal(%w/a.c b.c b.h/, base(MyDir.glob(''foo/?.?''))) end def test_glob_basic assert_respond_to(MyDir, :glob) assert_nothing_raised{ MyDir.glob("*") } end def test_glob_valid_metacharacters assert_nothing_raised{ MyDir.glob("**") } assert_nothing_raised{ MyDir.glob("foo.*") } assert_nothing_raised{ MyDir.glob("foo.?") } assert_nothing_raised{ MyDir.glob("*.[^r]*") } assert_nothing_raised{ MyDir.glob("*.[a-z][a-z]") } assert_nothing_raised{ MyDir.glob("*.{rb,h}") } assert_nothing_raised{ MyDir.glob("*.\t") } end def test_glob_star assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, base(MyDir.glob(''foo/*''))) assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, base(MyDir.glob(''foo/****''))) assert_equal(%w/a.c b.c/, base(MyDir.glob(''foo/*.c''))) assert_equal(%w/a.c a.cpp/, base(MyDir.glob(''foo/a*''))) assert_equal(%w/a.c a.cpp/, base(MyDir.glob(''foo/a*c*''))) assert_equal(%w/a.cpp/, base(MyDir.glob(''foo/a*p*''))) assert_equal([], MyDir.glob(''x*'')) end def test_glob_double_star assert_equal(%w/a.c a.cpp b.c b.h bar d g.rb/, base(MyDir.glob(''foo/**''))) assert_equal(%w/a.c b.c a.c/, base(MyDir.glob(''**/*.c''))) assert_equal(%w/a.c b.c a.c/, base(MyDir.glob(''foo/**/*.c''))) assert_equal(%w/a.c a.cpp a.c a2.cpp a3.h a4.rb/, base(MyDir.glob(''foo/**/a*''))) assert_equal([], MyDir.glob(''**/x*'')) end def test_glob_flags assert_nothing_raised{ MyDir.glob("*", File::FNM_DOTMATCH) } assert_nothing_raised{ MyDir.glob("*", File::FNM_NOESCAPE) } assert_nothing_raised{ MyDir.glob("*", File::FNM_PATHNAME) } assert_nothing_raised{ MyDir.glob("*", File::FNM_CASEFOLD) } end def test_glob_expected_errors assert_raises(TypeError){ MyDir.glob("*", "*") } end def teardown @foo_files = nil @bar_files = nil FileUtils.rm_rf(''foo'') end end Please feel free to take a stab at this. Good luck! Regards, Dan PS - I took a look at Python''s fnmatch.py code. It''s of no use - they don''t support ''**'' or ''{}'' notation.