In Files

Class/Module Index [+]

Quicksearch

Ferret::Search::PhraseQuery

Summary

PhraseQuery matches phrases like "the quick brown fox". Most people are familiar with phrase queries having used them in most internet search engines.

Slop

Ferret's phrase queries a slightly more advanced. You can match phrases with a slop, ie the match isn't exact but it is good enough. The slop is basically the word edit distance of the phrase. For example, "the quick brown fox" with a slop of 1 would match "the quick little brown fox". With a slop of 2 it would match "the brown quick fox".

query = PhraseQuery.new(:content)
query << "the" << "quick" << "brown" << "fox"

# matches => "the quick brown fox"

query.slop = 1
# matches => "the quick little brown fox"
                           |__1__^

query.slop = 2
# matches => "the brown quick _____ fox"
                    ^_____2_____|

Multi-PhraseQuery

Phrase queries can also have multiple terms in a single position. Let's say for example that we want to match synonyms for quick like "fast" and "speedy". You could the query like this;

query = PhraseQuery.new(:content)
query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox"
# matches => "the quick red fox"
# matches => "the fast brown fox"

query.slop = 1
# matches => "the speedy little red fox"

You can also leave positions blank. Lets say you wanted to match "the quick <> fox" where "<>" could match anything (but not nothing). You'd build this query like this;

query = PhraseQuery.new(:content)
query.add_term("the").add_term("quick").add_term("fox", 2)
# matches => "the quick yellow fox"
# matches => "the quick alkgdhaskghaskjdh fox"

The second parameter to PhraseQuery#add_term is the position increment for the term. It is one by default meaning that every time you add a term it is expected to follow the previous term. But setting it to 2 or greater you are leaving empty spaces in the term.

There are also so tricks you can do by setting the position increment to

  1. With a little help from your analyzer you can actually tag bold or

italic text for example. If you want more information about this, ask on the mailing list.

Public Class Methods

new(field, slop = 0) → phrase_query click to toggle source

Create a new PhraseQuery on the field field. You need to add terms to the query it will do anything of value. See PhraseQuery#add_term.

static VALUE
frb_phq_init(int argc, VALUE *argv, VALUE self)
{
    VALUE rfield, rslop;
    Query *q;
    rb_scan_args(argc, argv, "11", &rfield, &rslop);
    q = phq_new(frb_field(rfield));
    if (argc == 2) {
        ((PhraseQuery *)q)->slop = FIX2INT(rslop);
    }
    Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
    object_add(q, self);
    return self;
}

Public Instance Methods

add_term(term, position_increment = 1) → phrase_query click to toggle source
phrase_query << term → phrase_query

Add a term to the phrase query. By default the position_increment is set to 1 so each term you add is expected to come directly after the previous term. By setting position_increment to 2 you are specifying that the term you just added should occur two terms after the previous term. For example;

phrase_query.add_term("big").add_term("house", 2)
# matches => "big brick house"
# matches => "big red house"
# doesn't match => "big house"
static VALUE
frb_phq_add(int argc, VALUE *argv, VALUE self)
{
    VALUE rterm, rpos_inc;
    int pos_inc = 1;
    GET_Q();
    if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
        pos_inc = FIX2INT(rpos_inc);
    }
    switch (TYPE(rterm)) {
        case T_STRING:
            {
                phq_add_term(q, StringValuePtr(rterm), pos_inc);
                break;
            }
        case T_ARRAY:
            {
                int i;
                char *t;
                if (RARRAY_LEN(rterm) < 1) {
                    rb_raise(rb_eArgError, "Cannot add empty array to a "
                             "PhraseQuery. You must add either a string or "
                             "an array of strings");
                }
                t = StringValuePtr(RARRAY_PTR(rterm)[0]);
                phq_add_term(q, t, pos_inc);
                for (i = 1; i < RARRAY_LEN(rterm); i++) {
                    t = StringValuePtr(RARRAY_PTR(rterm)[i]);
                    phq_append_multi_term(q, t);
                }
                break;
            }
        default:
            rb_raise(rb_eArgError, "You can only add a string or an array of "
                     "strings to a PhraseQuery, not a %s\n", 
                     rs2s(rb_obj_as_string(rterm)));
    }
    return self;
}
add_term(term, position_increment = 1) → phrase_query click to toggle source
phrase_query << term → phrase_query

Add a term to the phrase query. By default the position_increment is set to 1 so each term you add is expected to come directly after the previous term. By setting position_increment to 2 you are specifying that the term you just added should occur two terms after the previous term. For example;

phrase_query.add_term("big").add_term("house", 2)
# matches => "big brick house"
# matches => "big red house"
# doesn't match => "big house"
static VALUE
frb_phq_add(int argc, VALUE *argv, VALUE self)
{
    VALUE rterm, rpos_inc;
    int pos_inc = 1;
    GET_Q();
    if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
        pos_inc = FIX2INT(rpos_inc);
    }
    switch (TYPE(rterm)) {
        case T_STRING:
            {
                phq_add_term(q, StringValuePtr(rterm), pos_inc);
                break;
            }
        case T_ARRAY:
            {
                int i;
                char *t;
                if (RARRAY_LEN(rterm) < 1) {
                    rb_raise(rb_eArgError, "Cannot add empty array to a "
                             "PhraseQuery. You must add either a string or "
                             "an array of strings");
                }
                t = StringValuePtr(RARRAY_PTR(rterm)[0]);
                phq_add_term(q, t, pos_inc);
                for (i = 1; i < RARRAY_LEN(rterm); i++) {
                    t = StringValuePtr(RARRAY_PTR(rterm)[i]);
                    phq_append_multi_term(q, t);
                }
                break;
            }
        default:
            rb_raise(rb_eArgError, "You can only add a string or an array of "
                     "strings to a PhraseQuery, not a %s\n", 
                     rs2s(rb_obj_as_string(rterm)));
    }
    return self;
}
slop → integer click to toggle source

Return the slop set for this phrase query. See the PhraseQuery description for more information on slop

static VALUE
frb_phq_get_slop(VALUE self)
{
    GET_Q();
    return INT2FIX(((PhraseQuery *)q)->slop);
}
slop = slop → slop click to toggle source

Set the slop set for this phrase query. See the PhraseQuery description for more information on slop

static VALUE
frb_phq_set_slop(VALUE self, VALUE rslop)
{
    GET_Q();
    ((PhraseQuery *)q)->slop = FIX2INT(rslop);
    return self;
}

[Validate]

Generated with the Darkfish Rdoc Generator 2.