Dave, thank you so much for the 0.11 release(s). You have solved many
problems for me. As part of my appreciation for your good works, I am
offering up for public consideration a silly little class that I wrote.
(Code is below.) This class offers a simplified Hash-like interface to
(a very restricted subset of) Ferret. Hence I call it FerretHash.
FerretHash comes with its very own pet Ferret bug. Run the crude unit
test to see the problem. (Long story short, it looks like term
frequency, as reported by IndexReader#terms, does not take deletions
into account.)
require ''rubygems''
require ''ferret''
require ''tempfile''
class FerretHash
def initialize(name=nil)
#make temp file name
unless path
tf=Tempfile.new("ferrethash_#$$")
name=tf.path
tf.close
File.unlink name
end
#open new ferret index with temp name
@name=name
open_writer
end
def open_writer
@writer and return
#a schema for the hash...
fis=Ferret::Index::FieldInfos.new
fis.add_field(:key, :index=>:untokenized, :store=>:no,
:term_vector=>:no)
fis.add_field(:value, :index=>:no, :store=>:yes,
:term_vector=>:no)
@writer=Ferret::Index::IndexWriter.new(:path=>@name,
:field_infos=>fis, :create_if_needed=>true, :analyzer=>nil)
end
def close_writer
@writer.close
@writer=nil
end
def close
@writer.close
@writer=nil
@name=nil
end
def destroy
name=@name
close
`rm -r #{name}`
nil
end
def path
@name
end
def [](key)
reader=Ferret::Index::IndexReader.new(@name)
searcher=Ferret::Search::Searcher.new(reader)
td=searcher.search(Ferret::Search::TermQuery.new(:key, key), :limit=>1)
case td.total_hits
when 0:
when 1: result=reader[td.hits.first.doc][:value]
else fail
end
searcher.close
reader.close
return result
end
def delete(key)
reader=Ferret::Index::IndexReader.new(@name)
searcher=Ferret::Search::Searcher.new(reader)
td=searcher.search(Ferret::Search::TermQuery.new(:key, key), :limit=>1)
case td.total_hits
when 0: #do nothing
when 1:
close_writer
docnum=td.hits.first.doc
result=reader[docnum][:value]
reader.delete docnum
reader.commit
else fail
end
searcher.close
reader.close
open_writer
result
end
def []=(key,value)
delete key
@writer << {:key=>key, :value=>value}
@writer.commit
return value
end
def set_fast!(key, value)
@writer << {:key=>key, :value=>value}
end
def sync
@writer.commit
end
def keys
reader=Ferret::Index::IndexReader.new(@name)
result=reader.terms(:key).extend(Enumerable).map{|term,freq|
freq==1 or fail
term
}
reader.close
return result
end
def values
result=[]
reader=Ferret::Index::IndexReader.new(@name)
reader.max_doc.times{|n|
result << reader[n][:value] unless reader.deleted? n
}
reader.close
result
end
def each_key
reader=Ferret::Index::IndexReader.new(@name)
result=reader.terms(:key).extend(Enumerable).each{|term,freq|
freq==1 or fail
yield term
}
reader.close
return self
end
def each
each_key{|k| yield k,self[k] }
end
include Enumerable
end
if __FILE__==$0
fh=FerretHash.new
keys=("a".."m").to_a
vals=("n".."z").to_a
keys.size.times{|i|
fh[keys[i]]=vals[i]
}
keys.size.times{|i|
fh[keys[i]]==vals[i] or fail
}
fh.keys.sort==keys or fail
fh.values.sort==vals or fail
fh["a"]="N"
fh["a"]=="N" or fail
fh.keys.sort==keys or fail
fh.values.sort==["N"]+vals[1..-1] or fail
fh.destroy
end