Skip to content

Commit

Permalink
Moved RubyString#succCommon19 and supporting methods to StringSupport.
Browse files Browse the repository at this point in the history
nirvdrum committed Jan 2, 2015
1 parent 78481a6 commit 8d89690
Showing 2 changed files with 171 additions and 167 deletions.
169 changes: 2 additions & 167 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
@@ -41,7 +41,6 @@
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.constants.CharacterType;
import org.jcodings.exception.EncodingException;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
@@ -3263,113 +3262,12 @@ public IRubyObject succ_bang() {
return succ_bang19();
}

private static enum NeighborChar {NOT_CHAR, FOUND, WRAPPED}

private static NeighborChar succChar(Encoding enc, byte[]bytes, int p, int len) {
while (true) {
int i = len - 1;
for (; i >= 0 && bytes[p + i] == (byte)0xff; i--) bytes[p + i] = 0;
if (i < 0) return NeighborChar.WRAPPED;
bytes[p + i] = (byte)((bytes[p + i] & 0xff) + 1);
int cl = StringSupport.preciseLength(enc, bytes, p, p + len);
if (cl > 0) {
if (cl == len) {
return NeighborChar.FOUND;
} else {
for (int j = p + cl; j < p + len - cl; j++) bytes[j] = (byte)0xff;
}
}
if (cl == -1 && i < len - 1) {
int len2 = len - 1;
for (; len2 > 0; len2--) {
if (StringSupport.preciseLength(enc, bytes, p, p + len2) != -1) break;
}
for (int j = p + len2 + 1; j < p + len - (len2 + 1); j++) bytes[j] = (byte)0xff;
}
}
}

private static NeighborChar predChar(Encoding enc, byte[]bytes, int p, int len) {
while (true) {
int i = len - 1;
for (; i >= 0 && bytes[p + i] == 0; i--) bytes[p + i] = (byte)0xff;
if (i < 0) return NeighborChar.WRAPPED;
bytes[p + i] = (byte)((bytes[p + i] & 0xff) - 1);
int cl = StringSupport.preciseLength(enc, bytes, p, p + len);
if (cl > 0) {
if (cl == len) {
return NeighborChar.FOUND;
} else {
for (int j = p + cl; j < p + len - cl; j++) bytes[j] = 0;
}
}
if (cl == -1 && i < len - 1) {
int len2 = len - 1;
for (; len2 > 0; len2--) {
if (StringSupport.preciseLength(enc, bytes, p, p + len2) != -1) break;
}
for (int j = p + len2 + 1; j < p + len - (len2 + 1); j++) bytes[j] = 0;
}
}
}

private static NeighborChar succAlnumChar(Encoding enc, byte[]bytes, int p, int len, byte[]carry, int carryP) {
byte save[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
int c = enc.mbcToCode(bytes, p, p + len);

final int cType;
if (enc.isDigit(c)) {
cType = CharacterType.DIGIT;
} else if (enc.isAlpha(c)) {
cType = CharacterType.ALPHA;
} else {
return NeighborChar.NOT_CHAR;
}

System.arraycopy(bytes, p, save, 0, len);
NeighborChar ret = succChar(enc, bytes, p, len);
if (ret == NeighborChar.FOUND) {
c = enc.mbcToCode(bytes, p, p + len);
if (enc.isCodeCType(c, cType)) return NeighborChar.FOUND;
}

System.arraycopy(save, 0, bytes, p, len);
int range = 1;

while (true) {
System.arraycopy(bytes, p, save, 0, len);
ret = predChar(enc, bytes, p, len);
if (ret == NeighborChar.FOUND) {
c = enc.mbcToCode(bytes, p, p + len);
if (!enc.isCodeCType(c, cType)) {
System.arraycopy(save, 0, bytes, p, len);
break;
}
} else {
System.arraycopy(save, 0, bytes, p, len);
break;
}
range++;
}

if (range == 1) return NeighborChar.NOT_CHAR;

if (cType != CharacterType.DIGIT) {
System.arraycopy(bytes, p, carry, carryP, len);
return NeighborChar.WRAPPED;
}

System.arraycopy(bytes, p, carry, carryP, len);
succChar(enc, carry, carryP, len);
return NeighborChar.WRAPPED;
}

@JRubyMethod(name = {"succ", "next"})
public IRubyObject succ19(ThreadContext context) {
Ruby runtime = context.runtime;
final RubyString str;
if (value.getRealSize() > 0) {
str = new RubyString(runtime, getMetaClass(), succCommon19(value));
str = new RubyString(runtime, getMetaClass(), StringSupport.succCommon(value));
// TODO: rescan code range ?
} else {
str = newEmptyString(runtime, getType(), value.getEncoding());
@@ -3381,76 +3279,13 @@ public IRubyObject succ19(ThreadContext context) {
public IRubyObject succ_bang19() {
modifyCheck();
if (value.getRealSize() > 0) {
value = succCommon19(value);
value = StringSupport.succCommon(value);
shareLevel = SHARE_LEVEL_NONE;
// TODO: rescan code range ?
}
return this;
}

private ByteList succCommon19(ByteList original) {
byte carry[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
int carryP = 0;
carry[0] = 1;
int carryLen = 1;

ByteList valueCopy = new ByteList(original);
valueCopy.setEncoding(original.getEncoding());
Encoding enc = original.getEncoding();
int p = valueCopy.getBegin();
int end = p + valueCopy.getRealSize();
int s = end;
byte[]bytes = valueCopy.getUnsafeBytes();

NeighborChar neighbor = NeighborChar.FOUND;
int lastAlnum = -1;
boolean alnumSeen = false;
while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
if (neighbor == NeighborChar.NOT_CHAR && lastAlnum != -1) {
if (ASCII.isAlpha(bytes[lastAlnum] & 0xff) ?
ASCII.isDigit(bytes[s] & 0xff) :
ASCII.isDigit(bytes[lastAlnum] & 0xff) ?
ASCII.isAlpha(bytes[s] & 0xff) : false) {
s = lastAlnum;
break;
}
}

int cl = StringSupport.preciseLength(enc, bytes, s, end);
if (cl <= 0) continue;
switch (neighbor = succAlnumChar(enc, bytes, s, cl, carry, 0)) {
case NOT_CHAR: continue;
case FOUND: return valueCopy;
case WRAPPED: lastAlnum = s;
}
alnumSeen = true;
carryP = s - p;
carryLen = cl;
}

if (!alnumSeen) {
s = end;
while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
int cl = StringSupport.preciseLength(enc, bytes, s, end);
if (cl <= 0) continue;
neighbor = succChar(enc, bytes, s, cl);
if (neighbor == NeighborChar.FOUND) return valueCopy;
if (StringSupport.preciseLength(enc, bytes, s, s + 1) != cl) succChar(enc, bytes, s, cl); /* wrapped to \0...\0. search next valid char. */
if (!enc.isAsciiCompatible()) {
System.arraycopy(bytes, s, carry, 0, cl);
carryLen = cl;
}
carryP = s - p;
}
}
valueCopy.ensure(valueCopy.getBegin() + valueCopy.getRealSize() + carryLen);
s = valueCopy.getBegin() + carryP;
System.arraycopy(valueCopy.getUnsafeBytes(), s, valueCopy.getUnsafeBytes(), s + carryLen, valueCopy.getRealSize() - carryP);
System.arraycopy(carry, 0, valueCopy.getUnsafeBytes(), s, carryLen);
valueCopy.setRealSize(valueCopy.getRealSize() + carryLen);
return valueCopy;
}

/** rb_str_upto_m
*
*/
169 changes: 169 additions & 0 deletions core/src/main/java/org/jruby/util/StringSupport.java
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@

import org.jcodings.Encoding;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.constants.CharacterType;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.util.IntHash;
@@ -990,4 +991,172 @@ public static int trNext(TR t, Ruby runtime, Encoding enc) {
}
}
}

/**
* succ
*/

public static enum NeighborChar {NOT_CHAR, FOUND, WRAPPED}

public static ByteList succCommon(ByteList original) {
byte carry[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
int carryP = 0;
carry[0] = 1;
int carryLen = 1;

ByteList valueCopy = new ByteList(original);
valueCopy.setEncoding(original.getEncoding());
Encoding enc = original.getEncoding();
int p = valueCopy.getBegin();
int end = p + valueCopy.getRealSize();
int s = end;
byte[]bytes = valueCopy.getUnsafeBytes();

NeighborChar neighbor = NeighborChar.FOUND;
int lastAlnum = -1;
boolean alnumSeen = false;
while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
if (neighbor == NeighborChar.NOT_CHAR && lastAlnum != -1) {
if (ASCIIEncoding.INSTANCE.isAlpha(bytes[lastAlnum] & 0xff) ?
ASCIIEncoding.INSTANCE.isDigit(bytes[s] & 0xff) :
ASCIIEncoding.INSTANCE.isDigit(bytes[lastAlnum] & 0xff) ?
ASCIIEncoding.INSTANCE.isAlpha(bytes[s] & 0xff) : false) {
s = lastAlnum;
break;
}
}

int cl = preciseLength(enc, bytes, s, end);
if (cl <= 0) continue;
switch (neighbor = succAlnumChar(enc, bytes, s, cl, carry, 0)) {
case NOT_CHAR: continue;
case FOUND: return valueCopy;
case WRAPPED: lastAlnum = s;
}
alnumSeen = true;
carryP = s - p;
carryLen = cl;
}

if (!alnumSeen) {
s = end;
while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
int cl = preciseLength(enc, bytes, s, end);
if (cl <= 0) continue;
neighbor = succChar(enc, bytes, s, cl);
if (neighbor == NeighborChar.FOUND) return valueCopy;
if (preciseLength(enc, bytes, s, s + 1) != cl) succChar(enc, bytes, s, cl); /* wrapped to \0...\0. search next valid char. */
if (!enc.isAsciiCompatible()) {
System.arraycopy(bytes, s, carry, 0, cl);
carryLen = cl;
}
carryP = s - p;
}
}
valueCopy.ensure(valueCopy.getBegin() + valueCopy.getRealSize() + carryLen);
s = valueCopy.getBegin() + carryP;
System.arraycopy(valueCopy.getUnsafeBytes(), s, valueCopy.getUnsafeBytes(), s + carryLen, valueCopy.getRealSize() - carryP);
System.arraycopy(carry, 0, valueCopy.getUnsafeBytes(), s, carryLen);
valueCopy.setRealSize(valueCopy.getRealSize() + carryLen);
return valueCopy;
}

public static NeighborChar succChar(Encoding enc, byte[] bytes, int p, int len) {
while (true) {
int i = len - 1;
for (; i >= 0 && bytes[p + i] == (byte)0xff; i--) bytes[p + i] = 0;
if (i < 0) return NeighborChar.WRAPPED;
bytes[p + i] = (byte)((bytes[p + i] & 0xff) + 1);
int cl = preciseLength(enc, bytes, p, p + len);
if (cl > 0) {
if (cl == len) {
return NeighborChar.FOUND;
} else {
for (int j = p + cl; j < p + len - cl; j++) bytes[j] = (byte)0xff;
}
}
if (cl == -1 && i < len - 1) {
int len2 = len - 1;
for (; len2 > 0; len2--) {
if (preciseLength(enc, bytes, p, p + len2) != -1) break;
}
for (int j = p + len2 + 1; j < p + len - (len2 + 1); j++) bytes[j] = (byte)0xff;
}
}
}

private static NeighborChar succAlnumChar(Encoding enc, byte[]bytes, int p, int len, byte[]carry, int carryP) {
byte save[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
int c = enc.mbcToCode(bytes, p, p + len);

final int cType;
if (enc.isDigit(c)) {
cType = CharacterType.DIGIT;
} else if (enc.isAlpha(c)) {
cType = CharacterType.ALPHA;
} else {
return NeighborChar.NOT_CHAR;
}

System.arraycopy(bytes, p, save, 0, len);
NeighborChar ret = succChar(enc, bytes, p, len);
if (ret == NeighborChar.FOUND) {
c = enc.mbcToCode(bytes, p, p + len);
if (enc.isCodeCType(c, cType)) return NeighborChar.FOUND;
}

System.arraycopy(save, 0, bytes, p, len);
int range = 1;

while (true) {
System.arraycopy(bytes, p, save, 0, len);
ret = predChar(enc, bytes, p, len);
if (ret == NeighborChar.FOUND) {
c = enc.mbcToCode(bytes, p, p + len);
if (!enc.isCodeCType(c, cType)) {
System.arraycopy(save, 0, bytes, p, len);
break;
}
} else {
System.arraycopy(save, 0, bytes, p, len);
break;
}
range++;
}

if (range == 1) return NeighborChar.NOT_CHAR;

if (cType != CharacterType.DIGIT) {
System.arraycopy(bytes, p, carry, carryP, len);
return NeighborChar.WRAPPED;
}

System.arraycopy(bytes, p, carry, carryP, len);
succChar(enc, carry, carryP, len);
return NeighborChar.WRAPPED;
}

private static NeighborChar predChar(Encoding enc, byte[]bytes, int p, int len) {
while (true) {
int i = len - 1;
for (; i >= 0 && bytes[p + i] == 0; i--) bytes[p + i] = (byte)0xff;
if (i < 0) return NeighborChar.WRAPPED;
bytes[p + i] = (byte)((bytes[p + i] & 0xff) - 1);
int cl = preciseLength(enc, bytes, p, p + len);
if (cl > 0) {
if (cl == len) {
return NeighborChar.FOUND;
} else {
for (int j = p + cl; j < p + len - cl; j++) bytes[j] = 0;
}
}
if (cl == -1 && i < len - 1) {
int len2 = len - 1;
for (; len2 > 0; len2--) {
if (preciseLength(enc, bytes, p, p + len2) != -1) break;
}
for (int j = p + len2 + 1; j < p + len - (len2 + 1); j++) bytes[j] = 0;
}
}
}
}

0 comments on commit 8d89690

Please sign in to comment.