hadoop统计单词数范例

hadoop统计单词数实例

etc/hadoop配置文件

1.vi core-site.xml

<?xml version="2.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

Licensed under the Apache License, Version 2.0 (the "License");

you may not use this file except in compliance with the License.

You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software

distributed under the License is distributed on an "AS IS" BASIS,

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and

limitations under the License. See accompanying LICENSE file.

-->

<name>fs.default.name </name>

<value>hdfs://localhost:9000</value>

</property>

</configuration>

2. vi mapred-site.xml

<?xml version="1.0"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

Licensed under the Apache License, Version 2.0 (the "License");

you may not use this file except in compliance with the License.

You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software

distributed under the License is distributed on an "AS IS" BASIS,

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and

limitations under the License. See accompanying LICENSE file.

-->

<name>mapred.job.tracker </name>

<value>localhost:9001</value>

</property>

</configuration>

3.vi hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

Licensed under the Apache License, Version 2.0 (the "License");

you may not use this file except in compliance with the License.

You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software

distributed under the License is distributed on an "AS IS" BASIS,

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and

limitations under the License. See accompanying LICENSE file.

-->

<name>fds.replication</name>

</property>

</property>

</property>

</configuration>

4.vi hadoop-env.sh

export JAVA_HOME=/opt/jdk1.8.0_65 调用jdk

5.启动

sbin下面是启动命令

./start-all.sh

5.1jps 查看启动的服务（五个）

15509 ResourceManager

14808 NameNode

15241 SecondaryNameNode

14974 DataNode

15679 NodeManager

6.关闭./stop-all.sh

hdfs的命令

hadoop fs -mkdir /user/trunk

hadoop fs -ls /user

hadoop fs -lsr /user (递归的)

hadoop fs -put test.txt /user/trunk

hadoop fs -put test.txt . (复制到hdfs当前目录下，首先要创建当前目录)

hadoop fs -get /user/trunk/test.txt . (复制到本地当前目录下)

hadoop fs -cat /user/trunk/test.txt

hadoop fs -tail /user/trunk/test.txt (查看最后1000字节)

hadoop fs -rm /user/trunk/test.txt

hadoop fs -help ls (查看ls命令的帮助文档)

java代码

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)

throws IOException, InterruptedException {

String[] arrayOfString1;

String val = value.toString();

String[] str = val.split(" ");

int j = (arrayOfString1 = str).length;

for (int i = 0; i < j; ++i) {

String s = arrayOfString1[i];

context.write(new Text(s), new IntWritable(1));

}

import java.io.IOException;

import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;

public class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable>

{

protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context)

throws IOException, InterruptedException

{

int sum = 0;

for (Iterator localIterator = values.iterator(); localIterator.hasNext(); ) { IntWritable val = (IntWritable)localIterator.next();

sum += val.get();

}

context.write(key, new IntWritable(sum));

}

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount

{

public static void main(String[] args)

throws Exception

{

Configuration conf = new Configuration();

Job job = new Job(conf, "word count");

job.setJarByClass(WordCount.class);

job.setMapperClass(WordcountMapper.class);

job.setCombinerClass(WordcountReducer.class);

job.setReducerClass(WordcountReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

System.exit((job.waitForCompletion(true)) ? 0 : 1);

}

hadoop-core-0.20.2.jar

相关推荐