Parent

Included Modules

Files

Class/Module Index [+]

Quicksearch

Bzip2::Reader

Bzip2::Reader is meant to read streams of bz2 compressed bytes. It behaves like an IO object with many similar methods. It also includes the Enumerable module and each element is a 'line' in the stream.

It can both decompress files:

reader = Bzip2::Reader.open('file')
puts reader.read

reader = Bzip2::Reader.new File.open('file')
put reader.gets

And it may just decompress raw strings

reader = Bzip2::Reader.new compressed_string
reader = Bzip2::Reader.new Bzip2.compress('compress-me')

Public Class Methods

allocate() click to toggle source

Internally allocates data for a new Reader @private

static VALUE bz_reader_s_alloc(VALUE obj) {
    struct bz_file *bzf;
    VALUE res;
    res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, free, bzf);
    bzf->bzs.bzalloc = bz_malloc;
    bzf->bzs.bzfree = bz_free;
    bzf->blocks = DEFAULT_BLOCKS;
    bzf->state = BZ_OK;
    return res;
}
foreach(filename, &block) click to toggle source

Reads a bz2 compressed file and yields each line to the block

Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
Bzip2::Reader.foreach('file'){ |l| p l }

# Output:
# "a\n"
# "b\n"
# "c\n"
# "\n"
# "d"

@param [String] filename the path to the file to open @yieldparam [String] each line of the file

static VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj) {
    VALUE fname, sep;
    struct foreach_arg arg;
    struct bz_file *bzf;

    if (!rb_block_given_p()) {
        rb_raise(rb_eArgError, "call out of a block");
    }
    rb_scan_args(argc, argv, "11", &fname, &sep);
#ifdef SafeStringValue
    SafeStringValue(fname);
#else
    Check_SafeStr(fname);
#endif
    arg.argc = argc - 1;
    arg.sep = sep;
    arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
    if (NIL_P(arg.obj)) {
        return Qnil;
    }
    arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
    Data_Get_Struct(arg.obj, struct bz_file, bzf);
    bzf->flags |= BZ2_RB_CLOSE;
    return rb_ensure(bz_reader_foreach_line, (VALUE)&arg, bz_reader_close, arg.obj);
}
new(*args) click to toggle source

Internally allocates data,

@see Bzip2::Writer#initialize @see Bzip2::Reader#initialize @private

static VALUE bz_s_new(int argc, VALUE *argv, VALUE obj) {
    VALUE res = rb_funcall2(obj, rb_intern("allocate"), 0, 0);
    rb_obj_call_init(res, argc, argv);
    return res;
}
open(filename, &block=nil) → Bzip2::Reader click to toggle source

@param [String] filename the name of the file to read from @yieldparam [Bzip2::Reader] reader the Bzip2::Reader instance

If a block is given, the created Bzip2::Reader instance is yielded to the block and will be closed when the block completes. It is guaranteed via ensure that the reader is closed

If a block is not given, a Bzip2::Reader instance will be returned

Bzip2::Reader.open('file') { |f| puts f.gets }

reader = Bzip2::Reader.open('file')
puts reader.gets
reader.close

@return [Bzip2::Reader, nil]

static VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj) {
    VALUE res;
    struct bz_file *bzf;

    if (argc < 1) {
        rb_raise(rb_eArgError, "invalid number of arguments");
    }
    argv[0] = rb_funcall2(rb_mKernel, id_open, 1, argv);
    if (NIL_P(argv[0])) {
        return Qnil;
    }
    res = rb_funcall2(obj, id_new, argc, argv);
    Data_Get_Struct(res, struct bz_file, bzf);
    bzf->flags |= BZ2_RB_CLOSE;
    if (rb_block_given_p()) {
        return rb_ensure(rb_yield, res, bz_reader_close, res);
    }
    return res;
}
readlines(filename, separator="\n") click to toggle source

Opens the given bz2 compressed file for reading and decompresses the file, returning an array of the lines of the file. A line is denoted by the separator argument.

Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }

Bzip2::Reader.readlines('file')      # => ["a\n", "b\n", "c\n", "\n", "d"]
Bzip2::Reader.readlines('file', 'c') # => ["a\nb\nc", "\n\nd"]

@param [String] filename the path to the file to read @param [String] separator the character to denote a newline in the file @see Bzip2::Reader#readlines @return [Array] an array of lines for the file @raise [Bzip2::Error] if the file is not a valid bz2 compressed file

static VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj) {
    VALUE fname, sep;
    struct foreach_arg arg;
    struct bz_file *bzf;

    rb_scan_args(argc, argv, "11", &fname, &sep);
#ifdef SafeStringValue
    SafeStringValue(fname);
#else
    Check_SafeStr(fname);
#endif
    arg.argc = argc - 1;
    arg.sep = sep;
    arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
    if (NIL_P(arg.obj)) {
        return Qnil;
    }
    arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
    Data_Get_Struct(arg.obj, struct bz_file, bzf);
    bzf->flags |= BZ2_RB_CLOSE;
    return rb_ensure(bz_reader_i_readlines, (VALUE)&arg, bz_reader_close, arg.obj);
}

Public Instance Methods

close() click to toggle source

Closes this reader to disallow further reads.

reader = Bzip2::Reader.new File.open('file')
reader.close

reader.closed? # => true

@return [File] the io with which the reader was created. @raise [IOError] if the stream has already been closed

static VALUE bz_reader_close(VALUE obj) {
    struct bz_file *bzf;
    VALUE res;

    Get_BZ2(obj, bzf);
    if (bzf->buf) {
        free(bzf->buf);
        bzf->buf = 0;
    }
    if (bzf->state == BZ_OK) {
        BZ2_bzDecompressEnd(&(bzf->bzs));
    }
    if (bzf->flags & BZ2_RB_CLOSE) {
        int closed = 0;
        if (rb_respond_to(bzf->io, id_closed)) {
            VALUE iv = rb_funcall2(bzf->io, id_closed, 0, 0);
            closed = RTEST(iv);
        }
        if (!closed && rb_respond_to(bzf->io, id_close)) {
            rb_funcall2(bzf->io, id_close, 0, 0);
        }
    }
    if (bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL)) {
        res = Qnil;
    } else {
        res = bzf->io;
    }
    bzf->io = 0;
    return res;
}
close!() click to toggle source

Originally undocument and had no sepcs. Appears to work nearly the same as Bzip2::Reader#close...

static VALUE bz_reader_close_bang(VALUE obj) {
    struct bz_file *bzf;
    int closed;

    Get_BZ2(obj, bzf);
    closed = bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL);
    bz_reader_close(obj);
    if (!closed && rb_respond_to(bzf->io, id_close)) {
        if (rb_respond_to(bzf->io, id_closed)) {
            closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
        }
        if (!closed) {
            rb_funcall2(bzf->io, id_close, 0, 0);
        }
    }
    return Qnil;
}
closed() click to toggle source
Alias for: closed?
closed?() click to toggle source

Tests whether this reader has be closed.

@return [Boolean] true if it is or false otherwise.

static VALUE bz_reader_closed(VALUE obj) {
    struct bz_file *bzf;

    Data_Get_Struct(obj, struct bz_file, bzf);
    return RTEST(bzf->io)?Qfalse:Qtrue;
}
Also aliased as: closed, closed
each(sep = "\n", &block) click to toggle source

Iterates over the lines of the stream.

@param [String] sep the byte which separates lines @yieldparam [String] line the next line of the file (including the separator

character)

@see Bzip2::Reader.foreach

static VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj) {
    VALUE line;
    int td[ASIZE], in;

    in = Qtrue;
    while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
        in = Qfalse;
        rb_yield(line);
    }
    return obj;
}
Also aliased as: each_line, each_line
each_byte(&block) click to toggle source

Iterates over the decompressed bytes of the file.

Bzip2::Writer.open('file'){ |f| f << 'asdf' }
reader = Bzip2::Reader.new File.open('file')
reader.each_byte{ |b| puts "#{b} #{b.chr}" }

# Output:
# 97 a
# 115 s
# 100 d
# 102 f

@yieldparam [Integer] byte the decompressed bytes of the file

static VALUE bz_reader_each_byte(VALUE obj) {
    int c;

    while ((c = bz_getc(obj)) != EOF) {
        rb_yield(INT2FIX(c & 0xff));
    }
    return obj;
}
each_line(*args) click to toggle source
Alias for: each
eof() click to toggle source
Alias for: eof?
eof?() click to toggle source

Test whether the bzip stream has reached its end (see Bzip2::Reader#eoz?) and then tests that the undlerying IO has also reached an eof

@return [Boolean] true if the stream has reached or false otherwise.

static VALUE bz_reader_eof(VALUE obj) {
    struct bz_file *bzf;
    VALUE res;

    res = bz_reader_eoz(obj);
    if (RTEST(res)) {
        Get_BZ2(obj, bzf);
        if (bzf->bzs.avail_in) {
            res = Qfalse;
        } else {
            res = bz_reader_getc(obj);
            if (NIL_P(res)) {
                res = Qtrue;
            } else {
                bz_reader_ungetc(obj, res);
                res = Qfalse;
            }
        }
    }
    return res;
}
Also aliased as: eof, eof
eoz() click to toggle source
Alias for: eoz?
eoz?() click to toggle source

Test whether the end of the bzip stream has been reached

@return [Boolean] true if the reader is at the end of the bz stream or

+false+ otherwise
static VALUE bz_reader_eoz(VALUE obj) {
    struct bz_file *bzf;

    Get_BZ2(obj, bzf);
    if (!bzf->in || !bzf->buf) {
        return Qnil;
    }
    if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
        return Qtrue;
    }
    return Qfalse;
}
Also aliased as: eoz, eoz
finish() click to toggle source

Originally undocument and had no sepcs. Appears to call Bzip2::Reader#read and then mark the stream as finished, but this didn't work for me...

static VALUE bz_reader_finish(VALUE obj) {
    struct bz_file *bzf;

    Get_BZ2(obj, bzf);
    if (bzf->buf) {
        rb_funcall2(obj, id_read, 0, 0);
        free(bzf->buf);
    }
    bzf->buf = 0;
    bzf->state = BZ_OK;
    return Qnil;
}
getc() click to toggle source

Reads one character from the stream, returning the byte read.

reader = Bzip2::Reader.new Bzip2.compress('ab')
reader.getc # => 97
reader.getc # => 98
reader.getc # => nil

@return [Integer, nil] the byte value of the character read or nil if eoz

has been reached
static VALUE bz_reader_getc(VALUE obj) {
    VALUE str;
    VALUE len = INT2FIX(1);

    str = bz_reader_read(1, &len, obj);
    if (NIL_P(str) || RSTRING_LEN(str) == 0) {
        return Qnil;
    }
    return INT2FIX(RSTRING_PTR(str)[0] & 0xff);
}
gets(sep = "\n") click to toggle source

Reads a line from the stream until the separator is reached. This does not throw an exception, but rather returns nil if an eoz/eof error occurs

reader = Bzip2::Reader.new Bzip2.compress("a\nb")
reader.gets # => "a\n"
reader.gets # => "b"
reader.gets # => nil

@return [String, nil] the read data or nil if eoz has been reached @see Bzip2::Reader#readline

static VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj) {
    int td[ASIZE];
    VALUE str = bz_reader_gets_internal(argc, argv, obj, td, Qtrue);

    if (!NIL_P(str)) {
        rb_lastline_set(str);
    }
    return str;
}
lineno() click to toggle source

Returns the current line number that the stream is at. This number is based on the newline separator being "n"

reader = Bzip2::Reader.new Bzip2.compress("a\nb")
reader.lineno     # => 0
reader.readline   # => "a\n"
reader.lineno     # => 1
reader.readline   # => "b"
reader.lineno     # => 2

@return [Integer] the current line number

static VALUE bz_reader_lineno(VALUE obj) {
    struct bz_file *bzf;

    Get_BZ2(obj, bzf);
    return INT2NUM(bzf->lineno);
}
lineno=(num) click to toggle source

Sets the internal line number count that this stream should be set at

reader = Bzip2::Reader.new Bzip2.compress("a\nb")
reader.lineno     # => 0
reader.readline   # => "a\n"
reader.lineno     # => 1
reader.lineno = 0
reader.readline   # => "b"
reader.lineno     # => 1

@note This does not actually rewind or move the stream forward @param [Integer] lineno the line number which the stream should consider

being set at

@return [Integer] the line number provided

static VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno) {
    struct bz_file *bzf;

    Get_BZ2(obj, bzf);
    bzf->lineno = NUM2INT(lineno);
    return lineno;
}
read(len = nil) click to toggle source

Read decompressed data from the stream.

Bzip2::Reader.new(Bzip2.compress('ab')).read    # => "ab"
Bzip2::Reader.new(Bzip2.compress('ab')).read(1) # => "a"

@return [String, nil] the decompressed data read or nil if eoz has been

reached

@param [Integer] len the number of decompressed bytes which should be read.

If nothing is specified, the entire stream is read
static VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj) {
    struct bz_file *bzf;
    VALUE res, length;
    int total;
    int n;

    rb_scan_args(argc, argv, "01", &length);
    if (NIL_P(length)) {
        n = -1;
    } else {
        n = NUM2INT(length);
        if (n < 0) {
            rb_raise(rb_eArgError, "negative length %d given", n);
        }
    }
    bzf = bz_get_bzf(obj);
    if (!bzf) {
        return Qnil;
    }
    res = rb_str_new(0, 0);
    if (OBJ_TAINTED(obj)) {
        OBJ_TAINT(res);
    }
    if (n == 0) {
        return res;
    }
    while (1) {
        total = bzf->bzs.avail_out;
        if (n != -1 && (RSTRING_LEN(res) + total) >= n) {
            n -= RSTRING_LEN(res);
            res = rb_str_cat(res, bzf->bzs.next_out, n);
            bzf->bzs.next_out += n;
            bzf->bzs.avail_out -= n;
            return res;
        }
        if (total) {
            res = rb_str_cat(res, bzf->bzs.next_out, total);
        }
        if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
            return res;
        }
    }
    return Qnil;
}
readchar() click to toggle source

Performs the same as Bzip2::Reader#getc except Bzip2::EOZError is raised if eoz has been readhed

@raise [Bzip2::EOZError] if eoz has been reached

static VALUE bz_reader_readchar(VALUE obj) {
    VALUE res = bz_reader_getc(obj);

    if (NIL_P(res)) {
        bz_eoz_error();
    }
    return res;
}
readline(sep = "\n") click to toggle source

Reads one line from the stream and returns it (including the separator)

reader = Bzip2::Reader.new Bzip2.compress("a\nb")
reader.readline # => "a\n"
reader.readline # => "b"
reader.readline # => raises Bzip2::EOZError

@param [String] sep the newline separator character @return [String] the read line @see Bzip2::Reader.readlines @raise [Bzip2::EOZError] if the stream has reached its end

static VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj) {
    VALUE res = bz_reader_gets_m(argc, argv, obj);

    if (NIL_P(res)) {
        bz_eoz_error();
    }
    return res;
}
readlines(sep = "\n") click to toggle source

Reads the lines of the files and returns the result as an array.

If the stream has reached eoz, then an empty array is returned

@param [String] sep the newline separator character @return [Array] an array of lines read @see Bzip2::Reader.readlines

static VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj) {
    VALUE line, ary;
    int td[ASIZE], in;

    in = Qtrue;
    ary = rb_ary_new();
    while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
        in = Qfalse;
        rb_ary_push(ary, line);
    }
    return ary;
}
to_io() click to toggle source

Returns the io stream underlying this stream. If the strem was constructed with a file, that is returned. Otherwise, an empty string is returned.

@return [File, String] similar to whatever the stream was constructed with @raise [IOError] if the stream has been closed

static VALUE bz_to_io(VALUE obj) {
    struct bz_file *bzf;

    Get_BZ2(obj, bzf);
    return bzf->io;
}
ungetc(byte) click to toggle source

"Ungets" a character/byte. This rewinds the stream by 1 character and inserts the given character into that position. The next read will return the given character as the first one read

reader = Bzip2::Reader.new Bzip2.compress('abc')
reader.getc         # => 97
reader.ungetc 97    # => nil
reader.getc         # => 97
reader.ungetc 42    # => nil
reader.getc         # => 42
reader.getc         # => 98
reader.getc         # => 99
reader.ungetc 100   # => nil
reader.getc         # => 100

@param [Integer] byte the byte to 'unget' @return [nil] always

static VALUE bz_reader_ungetc(VALUE obj, VALUE a) {
    struct bz_file *bzf;
    int c = NUM2INT(a);

    Get_BZ2(obj, bzf);
    if (!bzf->buf) {
        bz_raise(BZ_SEQUENCE_ERROR);
    }
    if (bzf->bzs.avail_out < bzf->buflen) {
        bzf->bzs.next_out -= 1;
        bzf->bzs.next_out[0] = c;
        bzf->bzs.avail_out += 1;
    } else {
        bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + 2);
        bzf->buf[bzf->buflen++] = c;
        bzf->buf[bzf->buflen] = '\0';
        bzf->bzs.next_out = bzf->buf;
        bzf->bzs.avail_out = bzf->buflen;
    }
    return Qnil;
}
ungets(str) click to toggle source

Equivalently "unget" a string. When called on a string that was just read from the stream, this inserts the string back into the stream to br read again.

When called with a string which hasn't been read from the stream, it does the same thing, and the next read line/data will start from the beginning of the given data and the continue on with the rest of the stream.

reader = Bzip2::Reader.new Bzip2.compress("a\nb")
reader.gets           # => "a\n"
reader.ungets "a\n"   # => nil
reader.gets           # => "a\n"
reader.ungets "foo"   # => nil
reader.gets           # => "foob"

@param [String] str the string to insert back into the stream @return [nil] always

static VALUE bz_reader_ungets(VALUE obj, VALUE a) {
    struct bz_file *bzf;

    Check_Type(a, T_STRING);
    Get_BZ2(obj, bzf);
    if (!bzf->buf) {
        bz_raise(BZ_SEQUENCE_ERROR);
    }
    if ((bzf->bzs.avail_out + RSTRING_LEN(a)) < bzf->buflen) {
        bzf->bzs.next_out -= RSTRING_LEN(a);
        MEMCPY(bzf->bzs.next_out, RSTRING_PTR(a), char, RSTRING_LEN(a));
        bzf->bzs.avail_out += RSTRING_LEN(a);
    } else {
        bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + RSTRING_LEN(a) + 1);
        MEMCPY(bzf->buf + bzf->buflen, RSTRING_PTR(a), char,RSTRING_LEN(a));
        bzf->buflen += RSTRING_LEN(a);
        bzf->buf[bzf->buflen] = '\0';
        bzf->bzs.next_out = bzf->buf;
        bzf->bzs.avail_out = bzf->buflen;
    }
    return Qnil;
}
unused() click to toggle source

Specs were missing for this method originally and playing around with it gave some very odd results, so unless you know what you're doing, I wouldn't mess around with this...

static VALUE bz_reader_unused(VALUE obj) {
    struct bz_file *bzf;
    VALUE res;

    Get_BZ2(obj, bzf);
    if (!bzf->in || bzf->state != BZ_STREAM_END) {
        return Qnil;
    }
    if (bzf->bzs.avail_in) {
        res = rb_tainted_str_new(bzf->bzs.next_in, bzf->bzs.avail_in);
        bzf->bzs.avail_in = 0;
    } else {
        res = rb_tainted_str_new(0, 0);
    }
    return res;
}
unused=(p1) click to toggle source

Specs were missing for this method originally and playing around with it gave some very odd results, so unless you know what you're doing, I wouldn't mess around with this...

static VALUE bz_reader_set_unused(VALUE obj, VALUE a) {
    struct bz_file *bzf;

    Check_Type(a, T_STRING);
    Get_BZ2(obj, bzf);
    if (!bzf->in) {
        bzf->in = rb_str_new(RSTRING_PTR(a), RSTRING_LEN(a));
    } else {
        bzf->in = rb_str_cat(bzf->in, RSTRING_PTR(a), RSTRING_LEN(a));
    }
    bzf->bzs.next_in = RSTRING_PTR(bzf->in);
    bzf->bzs.avail_in = RSTRING_LEN(bzf->in);
    return Qnil;
}

[Validate]

Generated with the Darkfish Rdoc Generator 2.