原始数据导入ods

原始数据导入ods

-- 建立ods_start_log表
drop table if exists ods_start_log;
create external table ods_start_log (`line` string)
partitioned by (`dt` string)
stored as 
inputformat 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '/ecdw/ods/ods_start_log';

-- 建立ods_event_log表
drop table if exists ods_event_log;
create external table ods_event_log(`line` string)
partitioned by (`dt` string)
stored as
inputformat 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/ecdw/ods/ods_event_log';

-- 导入数据
load data inpath '/origin_data/ecdw/log/topicstart/2020-08-08' 
into table ecdw.ods_start_log partition(dt='2020-08-08');

load data inpath '/origin_data/ecdw/log/topic_event/2020-08-08'
into table ecdw.ods_event_log partition(dt='2020-08-08');

-- 创建lzo索引
hadoop jar /apps/hadoop/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer 
/ecdw/ods/ods_start_log/dt=2020-08-08

hadoop jar /apps/hadoop/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer 
/ecdw/ods/ods_event_log/dt=2020-08-08