/* * call-seq: * StopFilter.new(token_stream) -> token_stream * StopFilter.new(token_stream, ["the", "and", "it"]) -> token_stream * * Create an StopFilter which removes *stop-words* from a TokenStream. You can * optionally specify the stopwords you wish to have removed. * * token_stream:: TokenStream to be filtered * stop_words:: Array of *stop-words* you wish to be filtered out. This * defaults to a list of English stop-words. The * Ferret::Analysis contains a number of stop-word lists. */ static VALUE frt_stop_filter_init(int argc, VALUE *argv, VALUE self) { VALUE rsub_ts, rstop_words; TokenStream *ts; rb_scan_args(argc, argv, "11", &rsub_ts, &rstop_words); ts = frt_get_cwrapped_rts(rsub_ts); if (rstop_words != Qnil) { char **stop_words = get_stopwords(rstop_words); ts = stop_filter_new_with_words(ts, (const char **)stop_words); free(stop_words); } else { ts = stop_filter_new(ts); } object_add(&(TkFilt(ts)->sub_ts), rsub_ts); Frt_Wrap_Struct(self, &frt_tf_mark, &frt_tf_free, ts); object_add(ts, self); return self; }