HDFS 中读取数据的步骤
方法之一: 通过URL
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.io.IOUtils;
public class Test {
static {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
/**
* @param args
*/
public static void main(String[] args) {
InputStream in = null;
try {
in = new URL("hdfs://192.168.1.150:9000/tmp/output/part-r-00000").openStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = null;
while( (line = reader.readLine()) != null){
System.out.println(line);
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
IOUtils.closeStream(in);
}
}
}
方法二: 通过FileSytem 方式
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class FileSytemCat {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String uri = "hdfs://192.168.1.150:9000/tmp/output/part-r-00000";
FileSystem fs = FileSystem.get(URI.create(uri), conf);
InputStream in = null;
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out,4096, false);
IOUtils.closeStream(in);
}
}