2022年2月9日 星期三

使用 Java 對檔案壓縮成 zip 及對 zip 檔解壓縮

這邊紀錄下使用 Java 壓縮/解壓縮 Zip 的方法,
以下先直接上程式碼:

ZipTest.java:

package main;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.Queue;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

public class ZipTest {

	public static void main(String[] args) throws IOException {
		String srcFilePath_isFile = "D:\\某檔案.jpg";
		String srcFilePath_isDirectory = "D:\\某資料夾";
		String toZipPath = "D:\\壓縮檔.zip";
		String toUnzipDirPath = "D:\\壓縮檔解開後要輸出到的資料夾";

		//----- zip file -----
		zipFile_onlyForSingleFile(srcFilePath_isFile, toZipPath); //壓縮單一檔案
		zipFile_onlyForSingleFile(srcFilePath_isDirectory, toZipPath); //壓縮單一資料夾,不包括資料夾內的檔案
		zipFile_canAlsoHandleDirectory_stackVersion(srcFilePath_isDirectory, toZipPath); //壓縮檔案或資料夾,使用佇列實現
		zipFile_canAlsoHandleDirectory_recursionVersion(srcFilePath_isDirectory, toZipPath); //壓縮檔案或資料夾,使用遞迴實現
		
		//----- unzip file -----
		unzipFile_byZipFile(toZipPath, toUnzipDirPath); //解壓縮,使用 ZipFile
		unzipFile_byZipInputStream(toZipPath, toUnzipDirPath); //解壓縮,使用 ZipInputStream

		System.out.println("Done");
	}

	/******************** Zip file *****************/
	public static void zipFile_onlyForSingleFile(String srcPath, String toPath) throws IOException {
		File srcFile = new File(srcPath);

		File zipFile = new File(toPath);
		FileOutputStream fileOutputStream = new FileOutputStream(zipFile);
		ZipOutputStream zipOutputStream = new ZipOutputStream(fileOutputStream);

		ZipEntry zipEntry = new ZipEntry(srcFile.getName() + (srcFile.isDirectory() ? File.separator : ""));
		zipOutputStream.putNextEntry(zipEntry);
		if (srcFile.isFile()) {
			// only srcFile is a file (not a directory) needs to write binary content of
			// file
			FileInputStream fileInputStream = new FileInputStream(srcFile);
			zipOutputStream.write(fileInputStream.readAllBytes());
			fileInputStream.close();
		}

		zipOutputStream.close();
		fileOutputStream.close();
	}

	public static void zipFile_canAlsoHandleDirectory_stackVersion(String srcPath, String toPath) throws IOException {
		File srcFile = new File(srcPath);
		String baseFileName = srcFile.getName();
		Path baseFilePath = Paths.get(srcPath);

		File zipFile = new File(toPath);
		FileOutputStream fileOutputStream = new FileOutputStream(zipFile);
		ZipOutputStream zipOutputStream = new ZipOutputStream(fileOutputStream);

		// use Queue to implement a BFS(Breadth-First Search) way to read all files and
		// directory
		Queue<File> fileQueue = new LinkedList<File>();
		fileQueue.add(srcFile);
		while (fileQueue.size() > 0) {
			File firstFileInQueue = fileQueue.poll();
			String relativePath = baseFileName + File.separator + baseFilePath.relativize(firstFileInQueue.toPath());

			if (firstFileInQueue.isFile()) {
				// do zip for file
				FileInputStream fileInputStream = new FileInputStream(firstFileInQueue);

				ZipEntry zipEntry = new ZipEntry(relativePath);
				zipOutputStream.putNextEntry(zipEntry);
				zipOutputStream.write(fileInputStream.readAllBytes());

				fileInputStream.close();
			} else if (firstFileInQueue.isDirectory()) {
				File[] childFileList = firstFileInQueue.listFiles();
				if (childFileList != null && childFileList.length > 0) {
					// add files inside directory into queue
					fileQueue.addAll(Arrays.asList(firstFileInQueue.listFiles()));
				} else {
					// if it is an empty directory,
					// just put a zipEntry and don't need to write binary content (And of course you
					// can't get binary content from a directory.)
					// don't need to do specific thing to non-empty directory because directory will
					// appear in zip when you zip files inside the directory
					ZipEntry zipEntry = new ZipEntry(relativePath + File.separator); // you should add a File.separator
																						// to let zip know it is a
																						// directory

					zipOutputStream.putNextEntry(zipEntry);
				}
			}
		}

		zipOutputStream.close();
		fileOutputStream.close();
	}

	public static void zipFile_canAlsoHandleDirectory_recursionVersion(String srcPath, String toPath)
			throws IOException {
		File zipFile = new File(toPath);
		FileOutputStream fileOutputStream = new FileOutputStream(zipFile);
		ZipOutputStream zipOutputStream = new ZipOutputStream(fileOutputStream);

		zipFile_canAlsoHandleDirectory_recursionVersion_helper(srcPath, srcPath, toPath, zipOutputStream);

		zipOutputStream.close();
		fileOutputStream.close();
	}

	private static void zipFile_canAlsoHandleDirectory_recursionVersion_helper(String basePath, String srcPath,
			String toPath, ZipOutputStream zipOutputStream) throws IOException {
		String baseFileName = new File(basePath).getName();
		Path baseFilePath = Paths.get(basePath);

		File srcFile = new File(srcPath);
		File zipFile = new File(toPath);

		if (srcFile.isFile()) {
			// do zip for file
			String relativePath = baseFileName + File.separator + baseFilePath.relativize(srcFile.toPath());
			FileInputStream fileInputStream = new FileInputStream(srcFile);

			ZipEntry zipEntry = new ZipEntry(relativePath);
			zipOutputStream.putNextEntry(zipEntry);
			zipOutputStream.write(fileInputStream.readAllBytes());

			fileInputStream.close();
		} else if (srcFile.isDirectory()) {
			File[] childFileList = srcFile.listFiles();

			if (childFileList != null && childFileList.length > 0) {
				for (File childFile : childFileList) {
					zipFile_canAlsoHandleDirectory_recursionVersion_helper(basePath, childFile.getPath(), toPath,
							zipOutputStream);
				}

			} else {
				String relativePath = baseFileName + File.separator + baseFilePath.relativize(srcFile.toPath());

				ZipEntry zipEntry = new ZipEntry(relativePath + File.separator);
				zipOutputStream.putNextEntry(zipEntry);
			}
		}
	}
	
	/******************** Unzip file *****************/
	public static void unzipFile_byZipInputStream(String zipFilePath, String toPath) throws IOException {
		File toPathFile = new File(toPath);
		if (!toPathFile.exists()) {
			toPathFile.mkdirs();
		}
		
		FileInputStream fileInputStream = new FileInputStream(zipFilePath);
		ZipInputStream zipInputStream = new ZipInputStream(fileInputStream);
		
		ZipEntry zipEntry = zipInputStream.getNextEntry();
		while(zipEntry != null) {
			File file = new File(toPath + File.separator + zipEntry.getName());
			//check is zip Entry a file or an directory
			//don't use zipEntry.isDirectory() becuase it only use "zipEntry.getName().endsWith("/")" to check
			if (zipEntry.getName().endsWith(File.separator) || zipEntry.getName().endsWith("/")) {				
				if (!file.exists()) {
					file.mkdirs();
				}
			}else {
				if (!file.exists()) {
					if (!file.getParentFile().exists()) {
						file.getParentFile().mkdirs();
					}
					FileOutputStream fileOutputStream = new FileOutputStream(file);
					fileOutputStream.write(zipInputStream.readAllBytes());
					fileOutputStream.close();
				}
			}
			
			zipEntry = zipInputStream.getNextEntry();
		}
		
		zipInputStream.close();
		fileInputStream.close();
	}

	public static void unzipFile_byZipFile(String zipFilePath, String toPath) throws IOException {
		File toPathFile = new File(toPath);
		if (!toPathFile.exists()) {
			toPathFile.mkdirs();
		}
		
		ZipFile zipFile = new ZipFile(zipFilePath);
		Enumeration<? extends ZipEntry> zipEntryEnumeration = zipFile.entries();
		while(zipEntryEnumeration.hasMoreElements()) {
			ZipEntry zipEntry = zipEntryEnumeration.nextElement();	
			File file = new File(toPath + File.separator + zipEntry.getName());
			//check is zip Entry a file or an directory
			//don't use zipEntry.isDirectory() becuase it only use "zipEntry.getName().endsWith("/")" to check
			if (zipEntry.getName().endsWith(File.separator) || zipEntry.getName().endsWith("/")) {				
				if (!file.exists()) {
					file.mkdirs();
				}
			}else {
				if (!file.exists()) {
					if (!file.getParentFile().exists()) {
						file.getParentFile().mkdirs();
					}
					InputStream zipFileInputStream = zipFile.getInputStream(zipEntry);
					FileOutputStream fileOutputStream = new FileOutputStream(file);
					fileOutputStream.write(zipFileInputStream.readAllBytes());
					
					fileOutputStream.close();
					zipFileInputStream.close();
				}
			}		
		}
		zipFile.close();
	}
}

說明:

上述程式碼展示了壓縮及解壓縮的各種不同方法,
zipFile_onlyForSingleFile() 只是展示了基本用法,只處理單一檔案或單一資料夾,
可以注意到幾點:

  1. 當處理資料夾時,只需要放入代表檔案 (或資料夾) 的 ZipEntry
    zipOutputStream.putEntry(zipEntry);
    不需要再寫入檔案的二進位資料,
    zipOutputStream.write(fileInputStream.readAllBytes());
    而如果是處理檔案時就需要再寫入檔案的二進位資料。
  2. 設定 new ZipEntry(String name) 時,需要 name 的參數,
    其代表檔案或資料夾的路徑(連同名字),路徑是相對於壓縮檔 root 位置,
    例如:
    xxx/yyy/zzz/someFile.jpg
    xxx/yyy/zzz/someDirectory/
    要注意如果是資料夾的話,要在最後面加上檔案路徑的分隔符號,例如 "/"

zipFile_canAlsoHandleDirectory_stackVersion() 和
zipFile_canAlsoHandleDirectory_recursionVersion() 展示了
如何壓縮一個內含多檔案(或資料夾)的巢狀結構 (即可能有多層資料夾 ) 資料夾的方法,
原理跟 zipFile_onlyForSingleFile() 一樣,只是對資料夾內的各層資料夾及內部檔案一個個的
去做設定 ZipEntry 的動作,
zipOutputStream.putEntry(zipEntry);
zipOutputStream.write(fileInputStream.readAllBytes());
只是遍歷檔案的實現方式不同而已,
zipFile_canAlsoHandleDirectory_stackVersion() 使用了佇列 (stack) 來實現,
zipFile_canAlsoHandleDirectory_recursionVersion() 使用了遞迴 (resurisive) 來實現。

在解壓縮的部份,展示了兩個方法:
unzipFile_byZipFile() 和
unzipFile_byZipInputStream(),
基本差異不大,只是使用的幫助 Class 不同而已,
unzipFile_byZipFile() 用了 ZipFile,而
unzipFile_byZipInputStream() 用了 ZipInputStream,
需要注意的是,
ZipEntry.isDirectory() 方法不是一個正確獲取 ZipEntry 是否為資料夾的好方法,
我們可以從源碼中可以看到如下程式碼:

public class ZipEntry implements ZipConstants, Cloneable {
..............
	public boolean isDirectory() {
        	return name.endsWith("/");
	}
..............
}

可以發現 isDirectory() 只是單純判斷了 ZipEntry 的 name 後面是否是 "/" 結尾,
但是如果如上述程式,我們在壓縮檔案時用 File.separator 來設定 ZipEntry 的檔案路徑分隔符的話,
判斷 ZipEntry 是否為資料夾就不應只是判斷結尾是否是 "/" ,而是看所在系統而有所不同 (例如 Unix 系統或 Windows 系統),例如有可能分隔符會是 "/" 或 "\" 。

參考資料:

沒有留言 :

張貼留言