hadoop统计单词数范例
etc/hadoop配置文件
1.vi core-site.xml
<?xml version="2.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name </name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
2. vi mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker </name>
<value>localhost:9001</value>
</property>
</configuration>
3.vi hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fds.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/home/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/hdfs/data</value>
</property>
</configuration>
4.vi hadoop-env.sh
export JAVA_HOME=/opt/jdk1.8.0_65 调用jdk
5.启动
sbin下面是启动命令
./start-all.sh
5.1jps 查看启动的服务 (五个)
15509 ResourceManager
14808 NameNode
15241 SecondaryNameNode
14974 DataNode
15679 NodeManager
6.关闭./stop-all.sh
hdfs的命令
hadoop fs -mkdir /user/trunk
hadoop fs -ls /user
hadoop fs -lsr /user (递归的)
hadoop fs -put test.txt /user/trunk
hadoop fs -put test.txt . (复制到hdfs当前目录下,首先要创建当前目录)
hadoop fs -get /user/trunk/test.txt . (复制到本地当前目录下)
hadoop fs -cat /user/trunk/test.txt
hadoop fs -tail /user/trunk/test.txt (查看最后1000字节)
hadoop fs -rm /user/trunk/test.txt
hadoop fs -help ls (查看ls命令的帮助文档)
java代码
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
String[] arrayOfString1;
String val = value.toString();
String[] str = val.split(" ");
int j = (arrayOfString1 = str).length;
for (int i = 0; i < j; ++i) {
String s = arrayOfString1[i];
context.write(new Text(s), new IntWritable(1));
}
}
}
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable>
{
protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException
{
int sum = 0;
for (Iterator localIterator = values.iterator(); localIterator.hasNext(); ) { IntWritable val = (IntWritable)localIterator.next();
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount
{
public static void main(String[] args)
throws Exception
{
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordcountMapper.class);
job.setCombinerClass(WordcountReducer.class);
job.setReducerClass(WordcountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit((job.waitForCompletion(true)) ? 0 : 1);
}
}
hadoop-core-0.20.2.jar