编撰java的UDF用于pig中

编写java的UDF用于pig中

1、java UDF 

package com.zqk.useragentutils;

 

import org.apache.pig.EvalFunc;

import org.apache.pig.data.BinSedesTupleFactory;

import org.apache.pig.data.DataBag;

import org.apache.pig.data.DefaultBagFactory;

import org.apache.pig.data.Tuple;

 

import java.io.IOException;

 

 

/**

 * Created with IntelliJ IDEA.

 * User: qeekey

 * Date: 13-4-15

 * Time: 下午2:30

 * To change this template use File | Settings | File Templates.

 */

public class UserAgentParse extends EvalFunc<Tuple> {

 

@Override

public Tuple exec(Tuple input) throws IOException {

if (input == null || input.size() == 0) {

return null;

}

 

try {

DataBag result = DefaultBagFactory.getInstance().newDefaultBag();

UserAgent userAgent = UserAgent.valueOf((Long)input.get(0));

if (userAgent != null ){

Tuple tuple = BinSedesTupleFactory.getInstance().newTuple();

tuple.append(userAgent.getOperatingSystem() != null && userAgent.getOperatingSystem().getName() !=null ? userAgent.getOperatingSystem().getName() : "" );

tuple.append(userAgent.getBrowser()!= null && userAgent.getBrowser().getName()!=null ? userAgent.getBrowser().getName() : "" );

tuple.append(userAgent.getDevice()!= null && userAgent.getDevice().getName()!=null ? userAgent.getDevice().getName() : "" );

return tuple;

}else {

//can not parse uaid

}

return null;

} catch (Exception e) {

throw new IOException(e);

}

 

}

 

 

public static void main(String[] args) throws Exception {

Tuple input = BinSedesTupleFactory.getInstance().newTuple();

input.append(72621750893412442l);

 

UserAgentParse t = new UserAgentParse();

Tuple result = t.exec(input);

for (int i = 0; i < result.size(); i++) {

System.out.println(result.get(i));

}

}

 

}

 

 

2、pig调用

REGISTER useragent.jar;

DEFINE UserAgentParse  com.zqk.useragentutils.UserAgentParse();

......

data = FOREACH data GENERATE FLATTEN(UserAgentParse(user_agent)) AS (os,browser,device);