hadoop,hive起用lzo压缩和创建lzo索引
hadoop,hive启用lzo压缩和创建lzo索引
- <property>
- <name>io.compression.codecs</name>
- <value>org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
- </property>
- <property>
- <name>io.compression.codec.lzo.class</name>
- <value>com.hadoop.compression.lzo.LzoCodec</value>
- </property>
- <property>
- <name>mapred.compress.map.output</name>
- <value>true</value>
- </property>
- <property>
- <name>mapred.map.output.compression.codec</name>
- <value>com.hadoop.compression.lzo.LzoCodec</value>
- </property>
- <property>
- <name>mapred.child.java.opts</name>
- <value>-Djava.library.path=/opt/hadoopgpl/native/Linux-amd64-64</value>
- </property>
- hadoop jar /opt/hadoopgpl/lib/hadoop-lzo.jar com.hadoop.compression.lzo.LzoIndexer /data/rawlog/your_log_file.lzo
- hadoop jar /usr/share/hadoop/contrib/streaming/hadoop-streaming-1.0.3.jar \
- -file map.py \
- -file red.py \
- -mapper map.py \
- -reducer red.py \
- -inputformat com.hadoop.mapred.DeprecatedLzoTextInputFormat \
- -input /data/rawlog/test/20130325 -output /tmp/test_20130325
- CREATE EXTERNAL TABLE adpv_20130325(
- stat_date string,
- stat_hour string,
- ip string,
- logdate string,
- uid string,
- ver string,
- pid string,
- chid string,
- json string,
- country string,
- province string,
- city string,
- isp string)
- ROW FORMAT DELIMITED
- FIELDS TERMINATED BY '\t'
- STORED AS TEXTFILE
- LOCATION
- 'hdfs://hadoopmaster:9000/data/dw/adpv/20130325'
- CREATE EXTERNAL TABLE adpv_20130325(
- stat_date string,
- stat_hour string,
- ip string,
- logdate string,
- uid string,
- ver string,
- pid string,
- chid string,
- json string,
- country string,
- province string,
- city string,
- isp string)
- ROW FORMAT DELIMITED
- FIELDS TERMINATED BY '\t'
- STORED AS INPUTFORMAT
- 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
- OUTPUTFORMAT
- 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
- LOCATION
- 'hdfs://hadoopmaster:9000/data/dw/adpv/20130325'
- -inputformat com.hadoop.mapred.DeprecatedLzoTextInputFormat -jobconf mapred.output.compress=true -jobconf mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec