On Mon, Oct 30, 2006 at 10:32:12PM +0800, Fabrice Colin
wrote:> Should the colon character only be used if the term being prefixed starts
> with a capital ?
Yes, since the QueryParser can't easily know if a particular prefix should
have a colon in general, only in specific cases.
If you always want a colon, just specify it explicitly in the second
argument to QueryParser::add_boolean_prefix.
> I have also found that if I search for "dir:/home/fabrice", the
Query
> object will have terms "dir", "home" and
"fabrice" instead of
> "XDIR:/home/fabrice"
> or "XDIR/home/fabrice".
>
> How I should prefix terms that don't start with a non-alphanumeric
> character ?
Looking at the code, a prefixed boolean term is taken as ending at the
first space or control character or ')', but the first character must be
alphanumeric because of how the test for a prefixed string or
sub-expression is handled.
Try the attached patch which allows any non-space, non-control first
character other than ')'.
Cheers,
Olly
-------------- next part --------------
Index: queryparser/queryparser.lemony
==================================================================---
queryparser/queryparser.lemony (revision 7378)
+++ queryparser/queryparser.lemony (working copy)
@@ -276,47 +276,44 @@
if (mode == DEFAULT && !prefixes.empty()) {
// Check for fieldname prefixes (e.g. title:historical).
AccentNormalisingItor p = find_if(it, end, C_isnotalnum);
- if (p != end && *p == ':' && ++p != end) {
- unsigned char ch = *p;
- if (C_isalnum(ch) ||
- ((flags & FLAG_PHRASE) && ch == '"') ||
- ((flags & FLAG_BOOLEAN) && ch == '(')) {
- string field;
- p = it;
- while (*p != ':') field += *p++;
- map<string, BoolAndString>::const_iterator f;
- f = prefixes.find(field);
- if (f != prefixes.end()) {
+ if (p != end && *p == ':' && ++p != end &&
*p > ' ' && *p != ')') {
+ string field;
+ p = it;
+ while (*p != ':') field += *p++;
+ map<string, BoolAndString>::const_iterator f;
+ f = prefixes.find(field);
+ if (f != prefixes.end()) {
+ unsigned char ch = *++p;
+ bool boolean_filter = f->second.flag;
+ if (boolean_filter || C_isalnum(ch) ||
+ ((flags & FLAG_PHRASE) && ch == '"') ||
+ ((flags & FLAG_BOOLEAN) && ch == '(')) {
+ it = p;
// Can't boolean prefix a subexpression or phrase.
- bool boolean_filter = f->second.flag;
- if (!boolean_filter || C_isalnum(ch)) {
- it = p;
+ if (!boolean_filter && !C_isalnum(ch)) {
+ newprev = ch;
++it;
- if (!C_isalnum(ch)) {
- newprev = ch;
- ++it;
- prefix_stack.push_back(f->second.str);
+ prefix_stack.push_back(f->second.str);
+ if (ch == '(') {
// Prefixed sub-expr: title:(fast NEAR food)
- if (ch == '(') {
- Parse(pParser, BRA, NULL, &state);
- continue;
- }
- // Prefixed phrase: subject:"space flight"
- Parse(pParser, QUOTE, NULL, &state);
- mode = IN_PREFIXED_QUOTES;
+ Parse(pParser, BRA, NULL, &state);
continue;
}
- prefix = f->second.str;
- if (boolean_filter) {
- if (prefix_needs_colon(prefix, *it))
- prefix += ':';
- while (it != end && *it > ' ' && *it !=
')')
- prefix += *it++;
- Parse(pParser, BOOLEAN_FILTER,
- new Term(prefix, 0), &state);
- continue;
- }
+ // Prefixed phrase: subject:"space flight"
+ Parse(pParser, QUOTE, NULL, &state);
+ mode = IN_PREFIXED_QUOTES;
+ continue;
}
+ prefix = f->second.str;
+ if (boolean_filter) {
+ if (prefix_needs_colon(prefix, *it))
+ prefix += ':';
+ while (it != end && *it > ' ' && *it !=
')')
+ prefix += *it++;
+ Parse(pParser, BOOLEAN_FILTER,
+ new Term(prefix, 0), &state);
+ continue;
+ }
}
}
}