使用Eclipse编写UDP函数,可以通过maven下载需要的jar包,pom.xml文件如下。
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.hadron</groupId>
<artifactId>hiveFun</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>hiveFun</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-exec -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
</dependencies>
<repositories>
<repository>
<id>central</id>
<name>Central Repository</name>
<url>http://maven.aliyun.com/nexus/content/repositories/central</url>
<layout>default</layout>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
</project>
package cn.hadron.hiveFun;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.DoubleWritable;
public class LevelUDF extends UDF{
public Text evaluate(double sal){
if(sal<1500){
return new Text("穷人");
}else if(sal<2000){
return new Text("温饱");
}else if(sal<3000){
return new Text("低产");
}else if(sal<6000){
return new Text("中产");
}else if(sal<8000){
return new Text("高产");
}else{
return new Text("富人");
}
}
}
导出jar包
hive> add jar /root/hiveFun.jar;
Added [/root/hiveFun.jar] to class path
Added resources: [/root/hiveFun.jar]
hive> create temporary function level_sal as 'cn.hadron.hiveFun.LevelUDF';
OK
Time taken: 1.06 seconds
hive>
hive> select ename,level_sal(sal) from emp;
OK
CLARK 低产
KING 中产
MILLER 穷人
SMITH 穷人
JONES 低产
FORD 中产
ALLEN 温饱
WARD 穷人
MARTIN 穷人
BLAKE 低产
TURNER 温饱
JAMES 穷人
HADRON 高产
Time taken: 0.181 seconds, Fetched: 13 row(s)
hive>
UDAF是用户自定义聚合函数。 要实现UDAF,我们需要实现下面的类: org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator AbstractGenericUDAFResolver检查输入参数,并且指定使用哪个resolver。
GenericUDAFResolver类已经过时弃用了,现在是实现GenericUDAFResolver2接口