class RMMSeg::Token

A Token hold the text and related position information.

Public Instance Methods

end() → end_pos click to toggle source

Get the end position of this token.

static VALUE tk_end(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->end;
    }

    static VALUE cToken;
    static VALUE tk_create(const char* base, const rmmseg::Token &t)
    {
        Token *tk = ALLOC(Token);
        int start = t.text-base;

        // This is necessary, see
        // http://lifegoo.pluskid.org/?p=348
        volatile VALUE text = rb_str_new(t.text, t.length);
        tk->text = text;

        tk->start = INT2FIX(start);
        tk->end = INT2FIX(start + t.length);
        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                (RUBY_DATA_FUNC)tk_mark,
                                (RUBY_DATA_FUNC)tk_free,
                                tk);
        return tok;
    }

    /*********************
     * Algorithm Class
     *********************/
    struct Algorithm
    {
        VALUE text;             // hold to avoid being garbage collected
        rmmseg::Algorithm *algor;
    };

    static void algor_mark(Algorithm *a)
    {
        rb_gc_mark(a->text);
    }
    static void algor_free(Algorithm *a)
    {
        free(a->algor);
    }

    static VALUE cAlgorithm;

    /*
     * Create an Algorithm object to do segmenting on +text+.
     *
     * call-seq:
     *   new(text)    -> algorithm
     *   
     */ 
    static VALUE algor_create(VALUE klass, VALUE text)
    {
        Algorithm *algor = ALLOC(Algorithm);
        void *mem;
        algor->text = text;
        mem = malloc(sizeof(rmmseg::Algorithm));
        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
                                                  RSTRING_LEN(text));

        return Data_Wrap_Struct(klass,
                                (RUBY_DATA_FUNC)algor_mark,
                                (RUBY_DATA_FUNC)algor_free,
                                algor);
    }

    /*
     * Get next token.
     *
     * call-seq:
     *   next_token()   -> token
     *
     * Return +nil+ if no more token available.
     */ 
    static VALUE algor_next_token(VALUE self)
    {
        Algorithm *algor = (Algorithm *)DATA_PTR(self);
        rmmseg::Token tk = algor->algor->next_token();

        if (tk.length == 0)
            return Qnil;
        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
        return rtk;
    }


    void Init_rmmseg()
    {
        mRMMSeg = rb_define_module("RMMSeg");

        /* Manage dictionaries used by rmmseg. */
        mDictionary = rb_define_module_under(mRMMSeg, "Dictionary");
        rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1);
        rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1);
        rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3);
        rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1);

        /* A Token hold the text and related position information. */
        cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject);
        rb_undef_method(rb_singleton_class(cToken), "new");
        rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0);
        rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0);
        rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0);

        /* An Algorithm object use the MMSEG algorithm to do segmenting. */
        cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject);
        rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1);
        rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0);
    }
}
start() → start_pos click to toggle source

Get the start position of this token.

static VALUE tk_start(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->start;
    }

    /*
     * Get the end position of this token.
     *
     * call-seq:
     *   end()    -> end_pos
     *
     */
    static VALUE tk_end(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->end;
    }

    static VALUE cToken;
    static VALUE tk_create(const char* base, const rmmseg::Token &t)
    {
        Token *tk = ALLOC(Token);
        int start = t.text-base;

        // This is necessary, see
        // http://lifegoo.pluskid.org/?p=348
        volatile VALUE text = rb_str_new(t.text, t.length);
        tk->text = text;

        tk->start = INT2FIX(start);
        tk->end = INT2FIX(start + t.length);
        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                (RUBY_DATA_FUNC)tk_mark,
                                (RUBY_DATA_FUNC)tk_free,
                                tk);
        return tok;
    }

    /*********************
     * Algorithm Class
     *********************/
    struct Algorithm
    {
        VALUE text;             // hold to avoid being garbage collected
        rmmseg::Algorithm *algor;
    };

    static void algor_mark(Algorithm *a)
    {
        rb_gc_mark(a->text);
    }
    static void algor_free(Algorithm *a)
    {
        free(a->algor);
    }

    static VALUE cAlgorithm;

    /*
     * Create an Algorithm object to do segmenting on +text+.
     *
     * call-seq:
     *   new(text)    -> algorithm
     *   
     */ 
    static VALUE algor_create(VALUE klass, VALUE text)
    {
        Algorithm *algor = ALLOC(Algorithm);
        void *mem;
        algor->text = text;
        mem = malloc(sizeof(rmmseg::Algorithm));
        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
                                                  RSTRING_LEN(text));

        return Data_Wrap_Struct(klass,
                                (RUBY_DATA_FUNC)algor_mark,
                                (RUBY_DATA_FUNC)algor_free,
                                algor);
    }

    /*
     * Get next token.
     *
     * call-seq:
     *   next_token()   -> token
     *
     * Return +nil+ if no more token available.
     */ 
    static VALUE algor_next_token(VALUE self)
    {
        Algorithm *algor = (Algorithm *)DATA_PTR(self);
        rmmseg::Token tk = algor->algor->next_token();

        if (tk.length == 0)
            return Qnil;
        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
        return rtk;
    }


    void Init_rmmseg()
    {
        mRMMSeg = rb_define_module("RMMSeg");

        /* Manage dictionaries used by rmmseg. */
        mDictionary = rb_define_module_under(mRMMSeg, "Dictionary");
        rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1);
        rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1);
        rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3);
        rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1);

        /* A Token hold the text and related position information. */
        cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject);
        rb_undef_method(rb_singleton_class(cToken), "new");
        rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0);
        rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0);
        rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0);

        /* An Algorithm object use the MMSEG algorithm to do segmenting. */
        cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject);
        rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1);
        rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0);
    }
}
text() → text click to toggle source

Get the text held by this token.

static VALUE tk_text(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->text;
    }

    /*
     * Get the start position of this token.
     *
     * call-seq:
     *   start()    -> start_pos
     *
     */
    static VALUE tk_start(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->start;
    }

    /*
     * Get the end position of this token.
     *
     * call-seq:
     *   end()    -> end_pos
     *
     */
    static VALUE tk_end(VALUE self)
    {
        Token *tk = (Token *)DATA_PTR(self);
        return tk->end;
    }

    static VALUE cToken;
    static VALUE tk_create(const char* base, const rmmseg::Token &t)
    {
        Token *tk = ALLOC(Token);
        int start = t.text-base;

        // This is necessary, see
        // http://lifegoo.pluskid.org/?p=348
        volatile VALUE text = rb_str_new(t.text, t.length);
        tk->text = text;

        tk->start = INT2FIX(start);
        tk->end = INT2FIX(start + t.length);
        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                (RUBY_DATA_FUNC)tk_mark,
                                (RUBY_DATA_FUNC)tk_free,
                                tk);
        return tok;
    }

    /*********************
     * Algorithm Class
     *********************/
    struct Algorithm
    {
        VALUE text;             // hold to avoid being garbage collected
        rmmseg::Algorithm *algor;
    };

    static void algor_mark(Algorithm *a)
    {
        rb_gc_mark(a->text);
    }
    static void algor_free(Algorithm *a)
    {
        free(a->algor);
    }

    static VALUE cAlgorithm;

    /*
     * Create an Algorithm object to do segmenting on +text+.
     *
     * call-seq:
     *   new(text)    -> algorithm
     *   
     */ 
    static VALUE algor_create(VALUE klass, VALUE text)
    {
        Algorithm *algor = ALLOC(Algorithm);
        void *mem;
        algor->text = text;
        mem = malloc(sizeof(rmmseg::Algorithm));
        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
                                                  RSTRING_LEN(text));

        return Data_Wrap_Struct(klass,
                                (RUBY_DATA_FUNC)algor_mark,
                                (RUBY_DATA_FUNC)algor_free,
                                algor);
    }

    /*
     * Get next token.
     *
     * call-seq:
     *   next_token()   -> token
     *
     * Return +nil+ if no more token available.
     */ 
    static VALUE algor_next_token(VALUE self)
    {
        Algorithm *algor = (Algorithm *)DATA_PTR(self);
        rmmseg::Token tk = algor->algor->next_token();

        if (tk.length == 0)
            return Qnil;
        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
        return rtk;
    }


    void Init_rmmseg()
    {
        mRMMSeg = rb_define_module("RMMSeg");

        /* Manage dictionaries used by rmmseg. */
        mDictionary = rb_define_module_under(mRMMSeg, "Dictionary");
        rb_define_singleton_method(mDictionary, "load_chars", RUBY_METHOD_FUNC(dic_load_chars), 1);
        rb_define_singleton_method(mDictionary, "load_words", RUBY_METHOD_FUNC(dic_load_words), 1);
        rb_define_singleton_method(mDictionary, "add", RUBY_METHOD_FUNC(dic_add), 3);
        rb_define_singleton_method(mDictionary, "has_word?", RUBY_METHOD_FUNC(dic_has_word), 1);

        /* A Token hold the text and related position information. */
        cToken = rb_define_class_under(mRMMSeg, "Token", rb_cObject);
        rb_undef_method(rb_singleton_class(cToken), "new");
        rb_define_method(cToken, "text", RUBY_METHOD_FUNC(tk_text), 0);
        rb_define_method(cToken, "start", RUBY_METHOD_FUNC(tk_start), 0);
        rb_define_method(cToken, "end", RUBY_METHOD_FUNC(tk_end), 0);

        /* An Algorithm object use the MMSEG algorithm to do segmenting. */
        cAlgorithm = rb_define_class_under(mRMMSeg, "Algorithm", rb_cObject);
        rb_define_singleton_method(cAlgorithm, "new", RUBY_METHOD_FUNC(algor_create), 1);
        rb_define_method(cAlgorithm, "next_token", RUBY_METHOD_FUNC(algor_next_token), 0);
    }
}