PhraseQuery matches phrases like "the quick brown fox". Most people are familiar with phrase queries having used them in most internet search engines.
Ferret's phrase queries a slightly more advanced. You can match phrases with a slop, ie the match isn't exact but it is good enough. The slop is basically the word edit distance of the phrase. For example, "the quick brown fox" with a slop of 1 would match "the quick little brown fox". With a slop of 2 it would match "the brown quick fox".
query = PhraseQuery.new(:content) query << "the" << "quick" << "brown" << "fox" # matches => "the quick brown fox" query.slop = 1 # matches => "the quick little brown fox" |__1__^ query.slop = 2 # matches => "the brown quick _____ fox" ^_____2_____|
Phrase queries can also have multiple terms in a single position. Let's say for example that we want to match synonyms for quick like "fast" and "speedy". You could the query like this;
query = PhraseQuery.new(:content) query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox" # matches => "the quick red fox" # matches => "the fast brown fox" query.slop = 1 # matches => "the speedy little red fox"
You can also leave positions blank. Lets say you wanted to match "the quick <> fox" where "<>" could match anything (but not nothing). You'd build this query like this;
query = PhraseQuery.new(:content) query.add_term("the").add_term("quick").add_term("fox", 2) # matches => "the quick yellow fox" # matches => "the quick alkgdhaskghaskjdh fox"
The second parameter to PhraseQuery#add_term is the position increment for the term. It is one by default meaning that every time you add a term it is expected to follow the previous term. But setting it to 2 or greater you are leaving empty spaces in the term.
There are also so tricks you can do by setting the position increment to
With a little help from your analyzer you can actually tag bold or
italic text for example. If you want more information about this, ask on the mailing list.
Create a new PhraseQuery on the field field. You need to add terms to the query it will do anything of value. See PhraseQuery#add_term.
static VALUE frb_phq_init(int argc, VALUE *argv, VALUE self) { VALUE rfield, rslop; Query *q; rb_scan_args(argc, argv, "11", &rfield, &rslop); q = phq_new(frb_field(rfield)); if (argc == 2) { ((PhraseQuery *)q)->slop = FIX2INT(rslop); } Frt_Wrap_Struct(self, NULL, &frb_q_free, q); object_add(q, self); return self; }
Add a term to the phrase query. By default the position_increment is set to 1 so each term you add is expected to come directly after the previous term. By setting position_increment to 2 you are specifying that the term you just added should occur two terms after the previous term. For example;
phrase_query.add_term("big").add_term("house", 2) # matches => "big brick house" # matches => "big red house" # doesn't match => "big house"
static VALUE frb_phq_add(int argc, VALUE *argv, VALUE self) { VALUE rterm, rpos_inc; int pos_inc = 1; GET_Q(); if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) { pos_inc = FIX2INT(rpos_inc); } switch (TYPE(rterm)) { case T_STRING: { phq_add_term(q, StringValuePtr(rterm), pos_inc); break; } case T_ARRAY: { int i; char *t; if (RARRAY_LEN(rterm) < 1) { rb_raise(rb_eArgError, "Cannot add empty array to a " "PhraseQuery. You must add either a string or " "an array of strings"); } t = StringValuePtr(RARRAY_PTR(rterm)[0]); phq_add_term(q, t, pos_inc); for (i = 1; i < RARRAY_LEN(rterm); i++) { t = StringValuePtr(RARRAY_PTR(rterm)[i]); phq_append_multi_term(q, t); } break; } default: rb_raise(rb_eArgError, "You can only add a string or an array of " "strings to a PhraseQuery, not a %s\n", rs2s(rb_obj_as_string(rterm))); } return self; }
Add a term to the phrase query. By default the position_increment is set to 1 so each term you add is expected to come directly after the previous term. By setting position_increment to 2 you are specifying that the term you just added should occur two terms after the previous term. For example;
phrase_query.add_term("big").add_term("house", 2) # matches => "big brick house" # matches => "big red house" # doesn't match => "big house"
static VALUE frb_phq_add(int argc, VALUE *argv, VALUE self) { VALUE rterm, rpos_inc; int pos_inc = 1; GET_Q(); if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) { pos_inc = FIX2INT(rpos_inc); } switch (TYPE(rterm)) { case T_STRING: { phq_add_term(q, StringValuePtr(rterm), pos_inc); break; } case T_ARRAY: { int i; char *t; if (RARRAY_LEN(rterm) < 1) { rb_raise(rb_eArgError, "Cannot add empty array to a " "PhraseQuery. You must add either a string or " "an array of strings"); } t = StringValuePtr(RARRAY_PTR(rterm)[0]); phq_add_term(q, t, pos_inc); for (i = 1; i < RARRAY_LEN(rterm); i++) { t = StringValuePtr(RARRAY_PTR(rterm)[i]); phq_append_multi_term(q, t); } break; } default: rb_raise(rb_eArgError, "You can only add a string or an array of " "strings to a PhraseQuery, not a %s\n", rs2s(rb_obj_as_string(rterm))); } return self; }
Return the slop set for this phrase query. See the PhraseQuery description for more information on slop
static VALUE frb_phq_get_slop(VALUE self) { GET_Q(); return INT2FIX(((PhraseQuery *)q)->slop); }
Set the slop set for this phrase query. See the PhraseQuery description for more information on slop
static VALUE frb_phq_set_slop(VALUE self, VALUE rslop) { GET_Q(); ((PhraseQuery *)q)->slop = FIX2INT(rslop); return self; }
Generated with the Darkfish Rdoc Generator 2.