module URLEscape

Public Class Methods

escape(p1) click to toggle source
static VALUE escape(VALUE self, VALUE str)
{
  char* buf;
  int len;
  VALUE outstr;
  int i;
  unsigned char byte_two, byte_three;

  StringValue(str);
  buf = RSTRING_PTR(str);
  len = RSTRING_LEN(str);
  outstr = rb_str_buf_new(len);

  for(i = 0; i < len;) {
    const unsigned char byte_one = buf[i];

    /* (UTF-8 escape, 0x0000-0x007F) */
    if(byte_one < 0x80) {
      if(valid_literal(byte_one)) {
        rb_str_buf_cat(outstr, buf+i, 1);
      } else if(byte_one == ' ') {
        // a + or %20 replacement (depending on const plus assignment)
        rb_str_buf_cat(outstr, plus, 1);
      } else { // It's ascii but needs encoding
        rb_str_buf_cat(outstr, hex_table[byte_one], 3);
      }
      i++;
      continue;
    }

    // Ok, there are UTF-8 prefix bytes, so we need to interpret
    // them.
    //
    // If we have at least one extra byte to consume
    if(i + 1 < len) {
      byte_two = buf[i + 1];

      /* (UTF-8 escape, 0x0080-0x07FF) */
      if(0xc0 <= byte_one && byte_one <= 0xdf && 0x80 <= byte_two && byte_two <= 0xbf) {
        rb_str_buf_cat(outstr, hex_table[byte_one], 3);
        rb_str_buf_cat(outstr, hex_table[byte_two], 3);
        i += 2;
        continue;

        // If we have at least two extra bytes to consume
      } else if(i + 2 < len) {
        byte_three = buf[i + 2];

        /* (UTF-8 escape, 0x0800-0xFFFF) */
        if(0xe0 == byte_one && 0x80 <= byte_two && byte_two <= 0xbf) {
          rb_str_buf_cat(outstr, hex_table[byte_one], 3);
          rb_str_buf_cat(outstr, hex_table[byte_two], 3);
          rb_str_buf_cat(outstr, hex_table[byte_three], 3);
          i += 3;
          continue;
        }
      }
    }

    /* (ISO Latin-1/2/? escape, 0x0080 - x00FF) */
    if(0x80 <= byte_one) {
      rb_str_buf_cat(outstr, hex_table[byte_one], 3);
    } else {
      // Well crap. Just throw it in I guess...
      rb_str_buf_cat(outstr, hex_table[byte_one], 3);
    }
    i++;
  }

  return outstr;
}
unescape(p1) click to toggle source
static VALUE unescape(VALUE self, VALUE str) {
  const char* buf;
  const char* bufend;
  VALUE outstr;

  StringValue(str);
  buf = RSTRING_PTR(str);
  bufend = buf + RSTRING_LEN(str);
  outstr = rb_str_buf_new(RSTRING_LEN(str));

  while(buf < bufend) {
    if(buf[0] == '%' && buf + 2 <= bufend) {
      char high = hex(buf[1]);
      char low =  hex(buf[2]);

      if(high >= 0 && low >= 0) {
        const char byte = low + (high << 4);
        rb_str_buf_cat(outstr, &byte, 1);
        buf += 3;
        continue;
      }
    }

    if(buf[0] == '+') {
      rb_str_buf_cat(outstr, space, 1);
    } else {
      rb_str_buf_cat(outstr, buf, 1);
    }
    buf++;
  }

  return outstr;
}