给定n个权值作为n个节点,构造一棵二叉树,若该树的带权路径长度(wpl)达到最小,这样的二叉树便成为最优二叉树,也被称为赫夫曼树。
节点的权:将树中节点付给某一个具有某种含义的数值,这个值便被称为节点的权。
节点的带权路径长度便为从根节点到该节点之间的路径长度与节点的权的乘积。
赫夫曼二叉树的构建:
- 从小到大对所有数据进行排序,
- 取出权值最小的两颗二叉树并组成一颗新二叉树,新的二叉树的权值为这两颗二叉树的和
- 对新二叉树以根节点的权值大小重新进行排序。
- 重复上述步骤直到所有节点都经过处理。
public class HuffmanTree { public static void main(String[] args) { int[] arr = {13, 7, 8, 3, 29, 6, 1}; Node root = createHuffmanTree(arr); root.preOrder(); } public static Node createHuffmanTree(int[] arr){ List<Node> nodes = new ArrayList<>(); for (int value : arr) { nodes.add(new Node(value)); } while (nodes.size() > 1) { //对二叉树进行排序 Collections.sort(nodes); //取出两个最小的节点(一个节点也可以看为最小的二叉树) Node left = nodes.get(0); Node right = nodes.get(1); //构建新二叉树 Node parent = new Node(left.value + right.value); parent.left = left; parent.right = right; nodes.remove(left); nodes.remove(right); //将最新构建的二叉树加入其中 nodes.add(parent); } return nodes.get(0); } } /** * 实现Comparable接口是为了方便节点的排序 */ class Node implements Comparable<Node>{ public Node left; public Node right; public int value; public Node(int value){ this.value = value; } @Override public String toString() { return "Node{" + "value=" + value + '}'; } @Override public int compareTo(Node o) { //当前节点小于 return this.value - o.value; } public void preOrder(){ System.out.println(this); if (this.left != null){ System.out.print("左子树:"); this.left.preOrder(); } if (this.right != null){ System.out.print("右子树:"); this.right.preOrder(); } } }
赫夫曼编码:
赫夫曼编码被广泛用于数据文件的压缩,压缩率一般在20%-90%之间。赫夫曼编码是可变长编码VLC的一种。并且赫夫曼编码是一种无损压缩编码。
若赫夫曼树的排序方法不同,对应的赫夫曼编码也不同,但是wpl相同都是最小的。
赫夫曼压缩代码:
public class Huffman { public static void main(String[] args) { String str = "i like like like java do you like a java"; byte[] bytes = str.getBytes(); List<Node> codes = getCodes(bytes); Node huffmanTree = createHuffmanTree(codes); // huffmanTree.preOrder(); System.out.println("赫夫曼编码表:"); getHuffmanCodes(huffmanTree, "", stringBuilder); System.out.println(huffmanCodes); byte[] zip = zip(bytes, huffmanCodes); System.out.println("压缩结果" + Arrays.toString(zip)); } //将赫夫曼编码表存入map中 private static Map<Byte,String> huffmanCodes = new HashMap<>(); //使用StringBuilder是为了方便获取这个字符的详细编码 private static StringBuilder stringBuilder = new StringBuilder(); /** * 获取赫夫曼编码表 * 向左值为0,向右值为1 * @param node * @param code * @param stringBuilder */ public static void getHuffmanCodes(Node node,String code,StringBuilder stringBuilder){ //在原基础上获取 StringBuilder st = new StringBuilder(stringBuilder); st.append(code); if (node.data == null){ if (node.left != null){ getHuffmanCodes(node.left, "0", st); } if (node.right != null) getHuffmanCodes(node.right, "1", st); }else { huffmanCodes.put(node.data, st.toString()); } } //对内容进行压缩 public static byte[] zip(byte[] bytes, Map<Byte,String> huffmanCodes){ StringBuilder builder = new StringBuilder(); for (byte b : bytes) { builder.append(huffmanCodes.get(b)); } int len = builder.length() % 8 ==0 ? builder.length()/8 : builder.length()/8 + 1; int index = 0;//index用于记录byte的下标 byte[] fileZip = new byte[len]; for (int i = 0;i < builder.length();i += 8){ int end = Math.min((i + 8), builder.length()); //将builder.substring(i, end)转换为byte,二进制转换为十进制 fileZip[index++] = (byte) Integer.parseInt(builder.substring(i, end), 2); } return fileZip; } //将文本内容转换为节点 public static List<Node> getCodes(byte[] bytes) { List<Node> nodes = new ArrayList<>(); //查询并保存每个字节出现的次数 Map<Byte, Integer> huffmanCodes = new HashMap<>(); for (byte b : bytes) { Integer i = huffmanCodes.get(b); if(i == null){ huffmanCodes.put(b, 1); }else{ huffmanCodes.put(b, i+1); } } //转换为Node集合 for (Map.Entry<Byte, Integer> entry : huffmanCodes.entrySet()) { nodes.add(new Node(entry.getKey(), entry.getValue())); } return nodes; } //通过转换的节点集合转换为赫夫曼树 public static Node createHuffmanTree(List<Node> nodes) { while(nodes.size() > 1){ //进行排序 Collections.sort(nodes); //取出最小的两颗树 Node left = nodes.get(0); Node right = nodes.get(1); //进行构建最新的树 Node parent = new Node(null, left.weight + right.weight); parent.left = left; parent.right = right; //删掉原最小的两棵树 nodes.remove(left); nodes.remove(right); //将最新的树添加进去 nodes.add(parent); } return nodes.get(0); } } class Node implements Comparable<Node>{ public Byte data; public int weight; public Node left; public Node right; public Node(Byte data, int weight) { this.data = data; this.weight = weight; } public void preOrder(){ System.out.println(this); if (this.left != null){ System.out.println("左:"); this.left.preOrder(); } if (this.right != null) { System.out.println("右:"); this.right.preOrder(); } } @Override public int compareTo(Node o) { return this.weight - o.weight; } @Override public String toString() { return "Node{" + "data=" + data + ", weight=" + weight + '}'; } }
赫夫曼编码压缩上述实例字符串后结果如下图:
赫夫曼编码解压:
赫夫曼编码解压操作是压缩操作的逆向操作,即将上述压缩后结果进行还原为i like like like java do you like a java字符串。
解压步骤:
- 将byte十进制数组还原为原二进制所对应的字符串
- 根据二进制字符串通过创建的赫夫曼编码表,进行还原。
//这里的byte值为压缩过的值 public static byte[] decode(byte[] bytes, Map<Byte,String> huffmanCodes){ // 添加空值检查 if (bytes == null || huffmanCodes == null || huffmanCodes.isEmpty()) { return new byte[0]; } // 构建二进制字符串 StringBuilder binaryStr = new StringBuilder(); for (int i = 0; i < bytes.length; i++) { boolean isLast = (i == bytes.length - 1); binaryStr.append(BinToString(!isLast, bytes[i])); } // 反转编码表 Map<String, Byte> reverseMap = new HashMap<>(); for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) { reverseMap.put(entry.getValue(), entry.getKey()); } // 解码 List<Byte> result = new ArrayList<>(); StringBuilder currentCode = new StringBuilder(); for (int i = 0; i < binaryStr.length(); i++) { currentCode.append(binaryStr.charAt(i)); Byte decodedByte = reverseMap.get(currentCode.toString()); if (decodedByte != null) { result.add(decodedByte); currentCode.setLength(0); } } // 转换为字节数组 byte[] source = new byte[result.size()]; for (int i = 0; i < result.size(); i++) { source[i] = result.get(i); } return source; } //将压缩的byte转换为原字符串的byte //flag的作用是:首先执行 bytes |= 256,将第9位设为1(256的二进制是100000000) public static String BinToString(boolean flag,int bytes){ if(flag){ bytes |= 256; } String binaryString = Integer.toBinaryString(bytes); if(flag){ return binaryString.substring(binaryString.length() - 8); }else { return binaryString; } }
![]()