Developer’s template: Hive using JDBC

Developer’s template series is intended to ease the life of  Bigdata developers with their application development and leave behind the headache of starting from the scratch. Here is a java program with its pom file which lets you to connect to Hive and perform some operations…

The pom file lets you create 2 jar files, one which has all the dependencies included in it.

HiveJdbcClient.java

package com.abc.hive.hive_jdbc;

import java.sql.SQLException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.sql.DriverManager;

public class HiveJdbcClient {
private static String driverName = “org.apache.hive.jdbc.HiveDriver”;

/**
* @param args
* @throws SQLException
*/
public static void main(String[] args) throws SQLException {
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}
//replace “hive” here with the name of the user the queries should run as
Connection con = DriverManager.getConnection(“jdbc:hive2://machine.abc.com:10000/default”, “”, “”);
Statement stmt = con.createStatement();
String tableName = “testHiveDriverTable”;
stmt.execute(“drop table if exists ” + tableName);
stmt.execute(“create table ” + tableName + ” (key int, value string)”);
// show tables
String sql = “show tables ‘” + tableName + “‘”;
System.out.println(“Running: ” + sql);
ResultSet res = stmt.executeQuery(sql);
if (res.next()) {
System.out.println(res.getString(1));
}
// describe table
sql = “describe ” + tableName;
System.out.println(“Running: ” + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1) + “\t” + res.getString(2));
}

// load data into table
// NOTE: filepath has to be local to the hive server
// NOTE: /tmp/a.txt is a ctrl-A separated file with two fields per line
String filepath = “/tmp/a.txt”;
sql = “load data local inpath ‘” + filepath + “‘ into table ” + tableName;
System.out.println(“Running: ” + sql);
stmt.execute(sql);

// select * query
sql = “select * from ” + tableName;
System.out.println(“Running: ” + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(String.valueOf(res.getInt(1)) + “\t” + res.getString(2));
}

// regular hive query
sql = “select count(1) from ” + tableName;
System.out.println(“Running: ” + sql);
res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(res.getString(1));
}
}
}


POM.xml

<project xmlns=”http://maven.apache.org/POM/4.0.0&#8243; xmlns:xsi=”http://www.w3.org/2001/XMLSchema-instance&#8221;
xsi:schemaLocation=”http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd”&gt;
<modelVersion>4.0.0</modelVersion>

<groupId>com.abc.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>hive-jdbc</name>
<url>http://maven.apache.org</url&gt;

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>

</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>0.14.0.2.2.0.0-2041</version>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>

</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.2.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.0</version>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>

</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>0.14.0.2.2.0.0-2041</version>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>

</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>0.14.0.2.2.0.0-2041</version>
<exclusions>
<exclusion>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>

</exclusion>
</exclusions>
</dependency>

</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<!– Additional configuration. –>
<artifactSet>
<excludes>
<exclude>org.apache.hadoop:jar</exclude>
</excludes>
</artifactSet>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation=”org.apache.maven.plugins.shade.resource.ManifestResourceTransformer”>
<manifestEntries>
<Main-Class>com/abc/hive/hive_jdbc/HiveJdbcClient</Main-Class>
<Build-Number>123</Build-Number>
</manifestEntries>
</transformer>
</transformers>
<finalName>uber-${artifactId}-${version}</finalName>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>


Include the latest jars in the pom.xml

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.1.2.3.2.0-2950</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<artifactId>jdk.tools</artifactId>
<groupId>jdk.tools</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.2.1.2.3.2.0-2950</version>
<classifier>standalone</classifier>
<scope>compile</scope>
</dependency>

 

Running the jar

hadoop jar uber-hive-jdbc-0.0.1-SNAPSHOT.jar

Stay tuned.. 🙂

Advertisements

About shalishvj : My Experience with BigData

6+ years of experience using Bigdata technologies in Architect, Developer and Administrator roles for various clients. • Experience using Hortonworks, Cloudera, AWS distributions. • Cloudera Certified Developer for Hadoop. • Cloudera Certified Administrator for Hadoop. • Spark Certification from Big Data Spark Foundations. • SCJP, OCWCD. • Experience in setting up Hadoop clusters in PROD, DR, UAT , DEV environments.
This entry was posted in hive, Java-Maven-Hadoop, Uncategorized and tagged , , , , . Bookmark the permalink.

2 Responses to Developer’s template: Hive using JDBC

  1. Its always good to use the Uber jar to run as it contains all the dependencies

  2. As the new versions of Hadoop and Hive have been released, it is good to modify your pom.xml to include the latest jars.

    org.apache.hadoop
    hadoop-common
    2.7.1.2.3.2.0-2950
    compile

    jdk.tools
    jdk.tools

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s