Java大文件瓜分

Java大文件分割

代码如下

 

package com.oceansoft.dupcheck;

 

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileFilter;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import java.io.PushbackReader;

import java.io.Writer;

import java.util.concurrent.atomic.AtomicInteger;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

 

import com.oceansoft.dupcheck.util.IOUtils;

 

/**

 * 大文件分割工具类(把大的文本文件分割为小的文本文件,按行分割)

 * 

 * @author 储玉庭

 */

class FileSpliter {

 

private static ThreadLocal<AtomicInteger> threadCounter = new ThreadLocal<AtomicInteger>();

private static ThreadLocal<FileProceedCallback> threadCallback = new                   ThreadLocal<FileProceedCallback>();

 

public static final void setThreadFileCallback(FileProceedCallback callback) {

 

if (null != threadCallback.get()) {

throw new IllegalStateException("callback already set");

}

 

threadCallback.set(callback);

}

 

/**

* 分割指定目录下的文件

* @param inputDir

*            输入文件目录

* @param extension

*            文件扩展名

* @param splitSize

*            分割后的小文件的大小

* @throws IOException

*/

public static final void splitFiles(File inputDir, final String extension,

String encoding, int splitSize) throws IOException {

 

File[] files = inputDir.listFiles(new FileFilter() {

 

@Override

public boolean accept(File pathname) {

 

return pathname.getPath().endsWith(extension);

}

});

 

if (null != files) {

 

for (File file : files) {

splitFile(file, encoding, splitSize);

}

}

threadCallback.get().proceedEnd();

}

 

private static final void splitFile(File largeFile, String encoding,

int splitSize) throws IOException {

 

threadCounter.set(new AtomicInteger(0));

if (largeFile.length() <= splitSize) {

 

File tmpFile = nextFile(largeFile);

IOUtils.copyFile(largeFile, tmpFile);

threadCallback.get().proceeded(tmpFile);

return;

}

 

File proceedFile = null;

final FileProceedCallback callback = threadCallback.get();

PushbackReader reader = null;

Writer output = null;

try {

int c;

int totalBytes = (int) largeFile.length();

int charsRead = 0, totalBytesRead = 0;

String CRLF = System.getProperty("line.separator");

proceedFile = nextFile(largeFile);

reader = new PushbackReader(new BufferedReader(

new InputStreamReader(new FileInputStream(largeFile),

encoding)));

output = new BufferedWriter(new OutputStreamWriter(

new FileOutputStream(proceedFile), encoding));

boolean hasLine = false;

while (true) {

 

c = reader.read();

 

if (-1 == c) {

break;

}

 

totalBytesRead++;

charsRead++;

if ('\r' == c) {

if (!hasLine) {

hasLine = true;

// 保存文件,生成下一个文件

if (charsRead >= splitSize) {

hasLine = false;

output.close();

charsRead = 0;

callback.proceeded(proceedFile);

proceedFile = nextFile(largeFile);

output = new BufferedWriter(

new OutputStreamWriter(

new FileOutputStream(proceedFile),

encoding));

}

}

 

// consume \n if possible

c = reader.read();

if ('\r' != c && '\n' != c) {

reader.unread(c);

} else {

totalBytesRead++;

charsRead++;

}

} else if ('\n' == c) {

if (!hasLine) {

hasLine = true;

 

// 保存文件,生成下一个文件

if (charsRead >= splitSize) {

hasLine = false;

output.close();

charsRead = 0;

callback.proceeded(proceedFile);

proceedFile = nextFile(largeFile);

output = new BufferedWriter(

new OutputStreamWriter(

new FileOutputStream(proceedFile),

encoding));

}

}

} else {

if (hasLine) {

hasLine = false;

output.write(CRLF);

}

output.write(c);

}

}

} finally {

try {

 

if (null != reader) {

reader.close();

}

 

if (null != output) {

output.close();

}

} catch (IOException e) {

e.printStackTrace();

}

 

if (null != proceedFile) {

callback.proceeded(proceedFile);

}

}

}

 

private static final File nextFile(File original) {

 

String filename = original.getName();

File tempDir = new File(System.getProperty("java.io.tmpdir")

+ File.separator + "split");

File file = new File(tempDir, filename.concat(".").concat(

String.valueOf(threadCounter.get().getAndIncrement())));

 

if (null != file.getParentFile() && !file.getParentFile().exists()) {

file.getParentFile().mkdirs();

}

 

return file;

}

 

public static interface FileProceedCallback {

 

/**

* 某个文件处理成功

* @param file

*/

void proceeded(File file);

 

/**

* 所有文件处理完毕

*/

void proceedEnd();

}

}