须知
1.String不属于基本数据类型.
2.String是Immutable(不可变的).String的大部分方法都会返回一个新的String.
3.Java对于String提供了编译期的运算符重载, String对于(+)的实现通过StringBuilder的append()方法.
4.Java默认提供了对于所有类转化为String类的方法, 即都继承了Object的toString()方法.
接口
public final class String
implements java.io.Serializable, Comparable<String>, CharSequence {
- Serializable序列化接口
- Comparable<String> 比较接口
- CharSequence 字符串接口
看下CharSequence接口的方法, 是一些作为字符串的基础方法
int length();
char charAt(int index);
CharSequence subSequence(int start, int end);
public String toString();
字段
private final char value[];
private int hash; // Default to 0
private static final long serialVersionUID = -6849794470754667710L;
private static final ObjectStreamField[] serialPersistentFields =
new ObjectStreamField[0];
- 基本数据类型字段一旦被设置为final, 对象初始化完成后, 该字段的
引用! 引用!
就不可变了 - hash值, 既然String是不可变的, 那么这里hash值肯定也是不可变的, 那么这里为什么不用
final
修饰? 如果要用final修饰, 那么就必须保证在构造函数中计算出hash值, 这样对于性能是有影响的 - serialVersionUID 用于序列化的类型确认
- ObjectStreamField[] 用于序列化
构造方法
String可以用很多与之相关的参数值来进行构造.比如,String, 字符数组, 字节数组(可指定编码方式), StringBuffer,StringBuilder等我们挑重点, 有思考价值的来看.
public String(char value[]) {
this.value = Arrays.copyOf(value, value.length);
}
public String(char value[], int offset, int count) {
if (offset < 0) {
throw new StringIndexOutOfBoundsException(offset);
}
if (count <= 0) {
if (count < 0) {
throw new StringIndexOutOfBoundsException(count);
}
if (offset <= value.length) {
this.value = "".value;
return;
}
}
// Note: offset or count might be near -1>>>1.
if (offset > value.length - count) {
throw new StringIndexOutOfBoundsException(offset + count);
}
this.value = Arrays.copyOfRange(value, offset, offset+count);
}
public String(StringBuffer buffer) {
synchronized(buffer) {
this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
}
}
public String(StringBuilder builder) {
this.value = Arrays.copyOf(builder.getValue(), builder.length());
}
- Arrays.copyOf 其实还是最后还是调用了System.arraycopy方法,这是一个JNI方法.
- 为什么要先复制在引用?
final char[] value
只能保证value引用的不可变性, value内部的元素还是可以改变的, 这里copy到该String实例内, 通过封装来保证不可变性. 如果直接this.value = value
那么, 一旦持有value的其他对象更改了value, 那么String就丧失了不可变性. - StringBuffer是一个线程安全类, 为什么还要加上sychronized来保证进一步线程安全? 关键在于getValue()方法, 这是一个AbStractStringBuilder抽象类的方法, 所以该方法是不加锁的(也没法加锁, 不能保证锁的一致性), 那么length()方法带锁.getValue()不带锁就会造成线程安全问题, 所以这里getValue()加上(buffer)的同步锁.保证与其内部方法的锁(this)一致.
- StringBuilder线程不安全为什么不加锁? 那肯定的啦, 都知道是线程不安全的了, 那么就需要聪明的你去保证他在多线程中的安全问题.
上面 public String(char value[], int offset, int count)
这个方法使用int count 来保存要存入的字符个数, Integer.MAX_VALUE = 2147483647, 但这并不是字符串在编译期能存入的最大值, 在编译期字符串最大是65534, 这里主要是因为常量池中CONSTANT_Utf8类型的length的最大是2^16即65536, 但是这里给了Null两个, 所以编译期最大容量就是65534个字节, 但是运行时可以存入的字节数就不会受到这个限制, 可以存入2147483647个字符, 大约4GB. 具体了解->我说我精通字符串,面试官竟然问我Java中的String有没有长度限制!?
该管的时候管, 不该管的时候不管, 性能和自由之道. - StringBuilder参数实例化String中学到
主要方法
public int length() {
return value.length;
}
public boolean isEmpty() {
return value.length == 0;
}
public char charAt(int index) {
if ((index < 0) || (index >= value.length)) {
//这里抛出的是一个非检查异常
throw new StringIndexOutOfBoundsException(index);
}
return value[index];
}
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return StringCoding.encode(charset, value, 0, value.length);
}
//equals这里, 先==判断是否是相同实例, 下面判断类型, 在进行字符一一比较.
public boolean equals(Object anObject) {
if (this == anObject) {
return true;
}
if (anObject instanceof String) {
String anotherString = (String)anObject;
int n = value.length;
if (n == anotherString.value.length) {
char v1[] = value;
char v2[] = anotherString.value;
int i = 0;
while (n-- != 0) {
if (v1[i] != v2[i])
return false;
i++;
}
return true;
}
}
return false;
}
public boolean contentEquals(StringBuffer sb) {
return contentEquals((CharSequence)sb);
}
private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
char v1[] = value;
char v2[] = sb.getValue();
int n = v1.length;
if (n != sb.length()) {
return false;
}
for (int i = 0; i < n; i++) {
if (v1[i] != v2[i]) {
return false;
}
}
return true;
}
//这里的String, StringBuffer StringBuilder都实现了CharSequence接口
public boolean contentEquals(CharSequence cs) {
// Argument is a StringBuffer, StringBuilder
if (cs instanceof AbstractStringBuilder) {
if (cs instanceof StringBuffer) {
//并没有额外写一个SyncContentEquals来StringBuffer的线程安全
//这里通过代码块的方式来保护
synchronized(cs) {
return nonSyncContentEquals((AbstractStringBuilder)cs);
}
} else {
return nonSyncContentEquals((AbstractStringBuilder)cs);
}
}
// Argument is a String
if (cs instanceof String) {
return equals(cs);
}
// Argument is a generic CharSequence
char v1[] = value;
int n = v1.length;
if (n != cs.length()) {
return false;
}
for (int i = 0; i < n; i++) {
if (v1[i] != cs.charAt(i)) {
return false;
}
}
return true;
}
public int compareTo(String anotherString) {
int len1 = value.length;
int len2 = anotherString.value.length;
int lim = Math.min(len1, len2);
char v1[] = value;
char v2[] = anotherString.value;
int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
k++;
}
return len1 - len2;
}
//这个方法通过Character.toUpperCase和Character.toLowerCase来转化字符进行比较
public boolean equalsIgnoreCase(String anotherString) {
return (this == anotherString) ? true
: (anotherString != null)
&& (anotherString.value.length == value.length)
&& regionMatches(true, 0, anotherString, 0, value.length);
}
public boolean regionMatches(boolean ignoreCase, int toffset,
String other, int ooffset, int len) {
char ta[] = value;
int to = toffset;
char pa[] = other.value;
int po = ooffset;
// Note: toffset, ooffset, or len might be near -1>>>1.
if ((ooffset < 0) || (toffset < 0)
|| (toffset > (long)value.length - len)
|| (ooffset > (long)other.value.length - len)) {
return false;
}
while (len-- > 0) {
char c1 = ta[to++];
char c2 = pa[po++];
if (c1 == c2) {
continue;
}
if (ignoreCase) {
// If characters don't match but case may be ignored,
// try converting both characters to uppercase.
// If the results match, then the comparison scan should
// continue.
char u1 = Character.toUpperCase(c1);
char u2 = Character.toUpperCase(c2);
if (u1 == u2) {
continue;
}
// Unfortunately, conversion to uppercase does not work properly
// for the Georgian alphabet, which has strange rules about case
// conversion. So we need to make one last check before
// exiting.
if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
continue;
}
}
return false;
}
return true;
}
//参考这篇文章: https://youngxinler.github.io/2019/08/05/Java-%E5%90%84%E7%A7%8Dhash%E7%AE%97%E6%B3%95%E5%88%86%E6%9E%90.html
public int hashCode() {
int h = hash;
if (h == 0 && value.length > 0) {
char val[] = value;
for (int i = 0; i < value.length; i++) {
h = 31 * h + val[i];
}
hash = h;
}
return h;
}
//indexOf, 将传入的char转型为int (unicode编码), 进行匹配
public int indexOf(int ch) {
return indexOf(ch, 0);
}
public int indexOf(int ch, int fromIndex) {
final int max = value.length;
if (fromIndex < 0) {
fromIndex = 0;
} else if (fromIndex >= max) {
// Note: fromIndex might be near -1>>>1.
return -1;
}
if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
// handle most cases here (ch is a BMP code point or a
// negative value (invalid code point))
final char[] value = this.value;
//这里竟然直接就匹配了....
for (int i = fromIndex; i < max; i++) {
if (value[i] == ch) {
return i;
}
}
return -1;
} else {
return indexOfSupplementary(ch, fromIndex);
}
}
public int lastIndexOf(int ch, int fromIndex) {
if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
// handle most cases here (ch is a BMP code point or a
// negative value (invalid code point))
final char[] value = this.value;
int i = Math.min(fromIndex, value.length - 1);
for (; i >= 0; i--) {
if (value[i] == ch) {
return i;
}
}
return -1;
} else {
return lastIndexOfSupplementary(ch, fromIndex);
}
}
//字符串匹配的主要方法, 适配器我就不贴代码了.
//这里使用的不是KMP, 而是先比较第一个, 然后在往后一一比较.
static int indexOf(char[] source, int sourceOffset, int sourceCount,
char[] target, int targetOffset, int targetCount,
int fromIndex) {
if (fromIndex >= sourceCount) {
return (targetCount == 0 ? sourceCount : -1);
}
if (fromIndex < 0) {
fromIndex = 0;
}
if (targetCount == 0) {
return fromIndex;
}
char first = target[targetOffset];
int max = sourceOffset + (sourceCount - targetCount);
for (int i = sourceOffset + fromIndex; i <= max; i++) {
/* Look for first character. */
if (source[i] != first) {
while (++i <= max && source[i] != first);
}
/* Found first character, now look at the rest of v2 */
if (i <= max) {
int j = i + 1;
int end = j + targetCount - 1;
for (int k = targetOffset + 1; j < end && source[j]
== target[k]; j++, k++);
if (j == end) {
/* Found whole string. */
return i - sourceOffset;
}
}
}
return -1;
}
//subString调用了public String(char[] value, int offset, int count); 里面使用了Arrays.copyOf.
public String substring(int beginIndex, int endIndex) {
if (beginIndex < 0) {
throw new StringIndexOutOfBoundsException(beginIndex);
}
if (endIndex > value.length) {
throw new StringIndexOutOfBoundsException(endIndex);
}
int subLen = endIndex - beginIndex;
if (subLen < 0) {
throw new StringIndexOutOfBoundsException(subLen);
}
return ((beginIndex == 0) && (endIndex == value.length)) ? this
: new String(value, beginIndex, subLen);
}
//先把原数组的值copy到buff中,再把要添加的数组中的值copy到buf中来.
public String concat(String str) {
int otherLen = str.length();
if (otherLen == 0) {
return this;
}
int len = value.length;
char buf[] = Arrays.copyOf(value, len + otherLen);
str.getChars(buf, len);
return new String(buf, true);
}
void getChars(char dst[], int dstBegin) {
System.arraycopy(value, 0, dst, dstBegin, value.length);
}
//这里的replace 和replaceAll区别, replace是单个字符,
//而replaceAll而是针对一个正则表达式来进行替换
public String replace(char oldChar, char newChar) {
if (oldChar != newChar) {
int len = value.length;
int i = -1;
char[] val = value; /* avoid getfield opcode */
while (++i < len) {
if (val[i] == oldChar) {
break;
}
}
if (i < len) {
char buf[] = new char[len];
for (int j = 0; j < i; j++) {
buf[j] = val[j];
}
while (i < len) {
char c = val[i];
buf[i] = (c == oldChar) ? newChar : c;
i++;
}
return new String(buf, true);
}
}
return this;
}
public boolean contains(CharSequence s) {
return indexOf(s.toString()) > -1;
}
public String[] split(String regex) {
return split(regex, 0);
}
//split方法也支持正则表达式写法. 只有单个字符且不是字母, 不被归入正则, 否则进入正则表达式
//limit 是返回限制返回字符串数组的长度, 而不是字符串的长度
public String[] split(String regex, int limit) {
/* fastpath if the regex is a
(1)one-char String and this character is not one of the
RegEx's meta characters ".$|()[{^?*+\\", or
(2)two-char String and the first char is the backslash and
the second is not the ascii digit or ascii letter.
*/
char ch = 0;
if (((regex.value.length == 1 &&
".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == '\\' &&
(((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
((ch-'a')|('z'-ch)) < 0 &&
((ch-'A')|('Z'-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
ArrayList<String> list = new ArrayList<>();
//使用indexOf匹配到一个, 就新添加一个String
while ((next = indexOf(ch, off)) != -1) {
if (!limited || list.size() < limit - 1) {
list.add(substring(off, next));
off = next + 1;
} else { // last one
//assert (list.size() == limit - 1);
list.add(substring(off, value.length));
off = value.length;
break;
}
}
// If no match was found, return this
if (off == 0)
return new String[]{this};
// Add remaining segment
//添加尾部
if (!limited || list.size() < limit)
list.add(substring(off, value.length));
// Construct result
int resultSize = list.size();
if (limit == 0) {
while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
resultSize--;
}
}
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);
}
return Pattern.compile(regex).split(this, limit);
}
//' ' = '/u0020' <=' ' /r /n都包含在内
public String trim() {
int len = value.length;
int st = 0;
char[] val = value; /* avoid getfield opcode */
while ((st < len) && (val[st] <= ' ')) {
st++;
}
while ((st < len) && (val[len - 1] <= ' ')) {
len--;
}
return ((st > 0) || (len < value.length)) ? substring(st, len) : this;
}
//返回的copy副本
public char[] toCharArray() {
// Cannot use Arrays.copyOf because of class initialization order issues
char result[] = new char[value.length];
System.arraycopy(value, 0, result, 0, value.length);
return result;
}
public static String valueOf(Object obj) {
return (obj == null) ? "null" : obj.toString();
}
String(char[] value, boolean share) {
// assert share : "unshared not supported";
//这里的参数直接赋值给了value, 不怕外部的修改么?
//因为这是一个default方法, 所以包外无法访问.
//其次, 看下面的valueOf方法
this.value = value;
}
public static String valueOf(char c) {
//这里用c来生成一个data数组, 而data数组是一个成员变量,
//而且没有被"不安全发布", 所以这里也保持了String的不可变.
char data[] = {c};
return new String(data, true);
}
//包装类都调用了自己的静态toString方法.
public static String valueOf(int i) {
return Integer.toString(i);
}
public static String valueOf(long l) {
return Long.toString(l);
}
以上, 大部分方法解读都写在了代码注释当中.
PREVIOUSJava 基础博文Collections
NEXTJava Enum 分析