深入源码分析String类

String类基本情况

不可变类，使用了final修饰，不可被继承
一旦创建String对象，包含这个对象的字符序列不可被改变
底层使用字符数组 char value[]
实现类序列化接口，可序列化的
效率较低，其每次增删改操作都会重新创建一个新的String对象，会造成对象堆积，触发垃圾回收机制，若小范围对象生成，可能只会触发minor GC，大范围的话，就会触发full GC，影响效率

String类源码分析

private final char value[]; String类底层使用了char数组，并且该数组是final修饰的，不可改变，用于字符存储，所以String一旦创建之后，即不可被修改了。因此所有的String对象的修改都不是在原来的对象基础上修改的，而是新建一个String对象对其修改并返回该新对象，原对象被废弃，资源浪费且性能较差。若遇到字符串将被频繁修改的情况，建议不要使用String，改用StringBuffer或StringBuilder。
构造器，每次构造String对象，都不会直接去使用该原始对象，会进行复制对象，因为保证其final不可变

public final class String  //String类由final修饰，不能被继承，可序列化
    implements java.io.Serializable, Comparable<String>, CharSequence {//CharSequence 字符串协议接口
    private final char value[]; //底层使用了char[] 而且被final修饰
    
    private int hash; // hash值属性 默认为0
    //数了下7个构造器，分析些常用和基本类似的吧。我们在生成一个String对象的时候必须对该对象的offset、count、value三个属性进行赋值，这样我们才能获得一个完成的String类型。
    
    //默认的构造方法，默认value是一个长度为0的字符数组，并且默认为null
    public String() { 
        this.value = new char[0]; 
    }
    
    public String(String original) {
		//初始化新创建的字符串对象,设置value数组和hash值
        this.value = original.value; 
        this.hash = original.hash;
    }
    
    //可以传入一个字符数组来构造，一旦传入数组就就被复制一份，保证其不可改变
    //增删改字符串，返回值也是新创建的字符串对象
     public String(char value[]) {
        this.value = Arrays.copyOf(value, value.length);
    }
 	
    //根据具体的下标和长度来构造新字符串
    public String(char value[], int offset, int count) {
        //健壮性判断
        if (offset < 0) {
            throw new StringIndexOutOfBoundsException(offset); 
        }
        if (count < 0) {
            throw new StringIndexOutOfBoundsException(count);
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > value.length - count) {
            throw new StringIndexOutOfBoundsException(offset + count);//起始值下标加长度大于数组长度，抛异常  
        }
        //数组范围性复制
        this.value = Arrays.copyOfRange(value, offset, offset+count);
    }
    
    //传入下标以及长度来构造
    public String(int[] codePoints, int offset, int count) {
        if (offset < 0) {
            throw new StringIndexOutOfBoundsException(offset);
        }
        if (count < 0) {
            throw new StringIndexOutOfBoundsException(count);
        }
        // Note: offset or count might be near -1>>>1.
        if (offset > codePoints.length - count) {
            throw new StringIndexOutOfBoundsException(offset + count);
        }
    // 计算string所需长度
    int n = count;
    for (int i = offset; i < end; i++) {
            int c = codePoints[i];
            if (Character.isBmpCodePoint(c))
                continue;
            else if (Character.isValidCodePoint(c))
                n++;
            else throw new IllegalArgumentException(Integer.toString(c));
    }
    // 分配和填充字符对象，提取每一位的字符，并将其放入String字符串。
    final char[] v = new char[n];

        for (int i = offset, j = 0; i < end; i++, j++) {
            int c = codePoints[i];
            if (Character.isBmpCodePoint(c))
                v[j] = (char)c;
            else
                Character.toSurrogates(c, v, j++);
        }
        this.value = v;
    }
    
    //可以传递StringBuffer和StringBuilder，返回String对象
    public String(StringBuffer buffer) {
    	// StringBuffer线程安全，使用了synchronized同步锁，效率低
        synchronized(buffer) {
            this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
        }
    }
    // 相比StringBuilder，没有同步控制，线程不安全，效率高
    public String(StringBuilder builder) {
        this.value = Arrays.copyOf(builder.getValue(), builder.length());
    }
 
    public int length() {
        //直接调用数组的length属性
        return value.length;
    }
     public boolean isEmpty() {
        //直接根据数组的length属性判断是否为空
        return value.length == 0;
    }
    
    // 直接取得下标为index的字符
    public char charAt(int index) {
        if ((index < 0) || (index >= value.length)) {
            throw new StringIndexOutOfBoundsException(index);
        }
        return value[index];
    }
    
    //返回指定索引处的字符（Unicode代码点）。该索引引用char值（Unicode代码单元），其范围从 0 到 length() - 1。就是返回一个Unicode值。
    public int codePointAt(int index) {
        if ((index < 0) || (index >= value.length)) {
            throw new StringIndexOutOfBoundsException(index);
        }
        return Character.codePointAtImpl(value, index, value.length);
    }

equals方法

// 重写equals方法，比较字符串的值是否相同
 public boolean equals(Object anObject) {
    //首先判断地址值是否相同，高效，同一个对象，值当然相同  
    if (this == anObject) {
        return true;
    }
    //确保只有互为String类型，才能进行比较，健壮性判断  
    if (anObject instanceof String) {
        String anotherString = (String)anObject;
        int n = value.length;
        //直接先比较长度，若不相等，则一定不等，效率高 
        if (n == anotherString.value.length) { 
            char v1[] = value;
            char v2[] = anotherString.value;
            int i = 0;
            //将两个String对象的值放入数组中，遍历比较，全部相同才表示相同 
            while (n-- != 0) {
                if (v1[i] != v2[i])
                    return false;
                i++;
            }
            return true;
        }
    }
    return false;
}

equalsIgnoreCase

// 直接比较字符串，不区分大小写
public boolean equalsIgnoreCase(String anotherString) {
    // 先比较地址值 
    return (this == anotherString) ? true
            : (anotherString != null)
            && (anotherString.value.length == value.length)
            && regionMatches(true, 0, anotherString, 0, value.length);
}

comapareTo方法

  //比较两个字符串字典。比较是基于字符串中的每个字符的Unicode值
  public int compareTo(String anotherString) {
      int len1 = value.length;
      int len2 = anotherString.value.length;
      int lim = Math.min(len1, len2);
      char v1[] = value;
      char v2[] = anotherString.value;
//遍历去比较两个字符串的每个字符Unicode值大小
      int k = 0;
      while (k < lim) {
          char c1 = v1[k];
          char c2 = v2[k];
          
          if (c1 != c2) {
              // 返回字典值差值，如果为负值，说明调用该方法的String对象字典值靠前
              return c1 - c2;
          }
          k++;
      }
      
      // 如果两个字符串相等，返回0时，equal（Object）方法将返回true。
      // 如果字符串为其中一个子串，就返回长度差，如果返回0，说明字符串相等
      return len1 - len2;
  }
  
  
  //判断一个字符串是否以prefix字符串开头，toffset是相同的长度
   public boolean startsWith (String prefix,int toffset){
          char ta[] = value;
          int to = offset + toffset;
          char pa[] = prefix.value;
          int po = prefix.offset;
          int pc = prefix.count;
          // Note: toffset might be near -1>>>1.
          if ((toffset < 0) || (toffset > count - pc)) {
              return false;
          }
          while (--pc >= 0) {
              if (ta[to++] != pa[po++]) {
                  return false;
              }
          }
          return true;
   }

concat方法

//连接两个字符串
public String concat (String str){
         int otherLen = str.length();
    		// 首先直接判断待拼接字符串的长度是否为0,
         if (otherLen == 0) {
             // 如果为0，说明没必要拼接，直接返回调用方法的String对象
             return this;
         }
    
         char buf[] = new char[count + otherLen];
         getChars(0, count, buf, 0);
         str.getChars(0, otherLen, buf, count);
    		//返回一个新建的String对象
         return new String(0, count + otherLen, buf);
   }

valueOf方法

   //String与基本类型的包装类转换
   public static String valueOf(boolean b) {
       // boolean类型 就俩
       return b ? "true" : "false";
   }
 
   public static String valueOf(char c) {
       // 直接赋值给value属性去存储而已
       char data[] = {c};
       // 传入一个字节数组，而不是字节
       return new String(data, true);
   }
// 其他都是调用包装类的toString方法
   public static String valueOf(int i) {
       return Integer.toString(i);
   }
    public static String valueOf(long l) {
       return Long.toString(l);
   }
   public static String valueOf(float f) {
       return Float.toString(f);
   }

substring方法

public String substring(int beginIndex) { 
    if (beginIndex < 0) {  
        throw new StringIndexOutOfBoundsException(beginIndex);  
    }  
    int subLen = value.length - beginIndex;  
    if (subLen < 0) {  
        throw new StringIndexOutOfBoundsException(subLen);  
    }  
   	// 当传入的开始下标符合且不为0时，
    // 直接新建一个String对象，改变了偏移量  
    return (beginIndex == 0) ? this : new String(value, beginIndex, subLen);

// 传入了起始和结束角标值来截取
public String substring(int beginIndex, int endIndex) {  
     if (beginIndex < 0) {  
         throw new StringIndexOutOfBoundsException(beginIndex);  
     }  
     if (endIndex > value.length) {  
         throw new StringIndexOutOfBoundsException(endIndex);  
     }  
     int subLen = endIndex - beginIndex;  
     if (subLen < 0) {  
         throw new StringIndexOutOfBoundsException(subLen);  
     } 
     // value值不变，只是而是改变了偏移量和count长度来返回新的String对象  
     return ((beginIndex == 0) && (endIndex == value.length)) ? this  
             : new String(value, beginIndex, subLen);
 }

replace方法

public String replace(char oldChar, char newChar) {  
    if (oldChar != newChar) {  
        int len = value.length;//替代的是整个value中的oldChar，而不是从偏移量开始替代  
        int i = -1;  
        char[] val = value;  

        while (++i < len) {//先遍历数组中是否有原字母，没有就无需替换，高效的设计  
            if (val[i] == oldChar) {  
                break;  
            }  
        }  
        if (i < len) {//获得需要替换的char的下标，此下表以前的char直接复制，  
        //此下标以后的char才开始一个一个比较，若等于oldchar则替换，高效  
            char buf[] = new char[len];  
            for (int j = 0; j < i; j++) {  
                buf[j] = val[j];//  
            }  
            while (i < len) {  
                char c = val[i];  
                buf[i] = (c == oldChar) ? newChar : c;  
                i++;  
            }  
            return new String(buf, true);  
        }  
    }  
    return this;  
}

String对象的地址问题，异同问题

字符串创建异同

String a = "hxhaaj";   
String b = "hxhaaj";
String c = new String ("hxhaaj");
String d = new String ("hxhaaj");
System.out.println(a==b);//true， 被创建在String Pool中，a和b会指向同一个对象，
System.out.println(a==c);//false a被创建在String Pool中，而c所指向的对象被创建在heap中，两者为不同的对象，地址值不同
System.out.println(d==c);//false c和d所指的对象都被创建在堆中，但是两个不同的对象，地址值不同

解析：

String a = "a" 这样的字符串创建方式，对象被创建在方法区的常量池中，并且当再次有相同值的字符串被创建时，就会再次被引用到其他字符串对象变量上，即两个变量指向的是同一个在常量池中的字符串对象引用。JVM会在常量池中先查找有有没有一个值为”a”的对象，如果有，就会把它直接赋值给当前引用。即原来那个引用和现在这个引用指点向了同一对象，如果没有，则在常量池中再重新新创建一个”a”；

new String ("hxhaaj") 该方式创建的对象都会在堆中，并且即使值相同的对象，每new一次也会重新创建一个对象在堆中，所以肯定不相同。

编译优化

1
2
3

String a = "ab";
String b = "a" + "b";
System.out.println((a == b));// true  字符串在编译时被优化，编译器将这个"a" + "b"作为常量表达式，在编译时进行优化，直接取结果”ab”

解析直接创建的java对string的优化过程：编译优化 + 常量池。String b = “a” + “b”; 编译器将这个”a” + “b”作为常量表达式，在编译时进行优化，直接取结果”ab”。

常量表达式问题

String a = "a1";
String b = "a" + 1;
System.out.println((a == b)); 	// true
String c = "a" + true;      
String d = "atrue"  ;
System.out.println((c == d)); // true
String e = "a" + 3.14;     
String f = "a3.14" ;
System.out.println((e == f)); //true

常量会在常量池中创建，常量池中的对象，都会进行优化，取常量池中的地址值，所以内存值相同。

String + String（这的string指的是直接量）；

String + 基本类型；

变量表达式问题

String a = "ab";
String b = "b";
String c = "a" + b;
System.out.println(a==c); //false  b本身是一个变量，如果改成 “b”,则为true