编撰java的UDF用于pig中
1、java UDF
package com.zqk.useragentutils;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.BinSedesTupleFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DefaultBagFactory;
import org.apache.pig.data.Tuple;
import java.io.IOException;
/**
* Created with IntelliJ IDEA.
* User: qeekey
* Date: 13-4-15
* Time: 下午2:30
* To change this template use File | Settings | File Templates.
*/
public class UserAgentParse extends EvalFunc<Tuple> {
@Override
public Tuple exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) {
return null;
}
try {
DataBag result = DefaultBagFactory.getInstance().newDefaultBag();
UserAgent userAgent = UserAgent.valueOf((Long)input.get(0));
if (userAgent != null ){
Tuple tuple = BinSedesTupleFactory.getInstance().newTuple();
tuple.append(userAgent.getOperatingSystem() != null && userAgent.getOperatingSystem().getName() !=null ? userAgent.getOperatingSystem().getName() : "" );
tuple.append(userAgent.getBrowser()!= null && userAgent.getBrowser().getName()!=null ? userAgent.getBrowser().getName() : "" );
tuple.append(userAgent.getDevice()!= null && userAgent.getDevice().getName()!=null ? userAgent.getDevice().getName() : "" );
return tuple;
}else {
//can not parse uaid
}
return null;
} catch (Exception e) {
throw new IOException(e);
}
}
public static void main(String[] args) throws Exception {
Tuple input = BinSedesTupleFactory.getInstance().newTuple();
input.append(72621750893412442l);
UserAgentParse t = new UserAgentParse();
Tuple result = t.exec(input);
for (int i = 0; i < result.size(); i++) {
System.out.println(result.get(i));
}
}
}
2、pig调用
REGISTER useragent.jar;
DEFINE UserAgentParse com.zqk.useragentutils.UserAgentParse();
......
data = FOREACH data GENERATE FLATTEN(UserAgentParse(user_agent)) AS (os,browser,device);