[Java] Connecting to and Running Hive via JDBC

Hadoop has many versions, and if the version is wrong, connecting to and operating Hive may throw all sorts of errors. This is where Maven, mentioned in the previous blog post, comes in handy for managing these libraries.

Checking Hadoop and Hive versions:

1
2
hadoop version
hive --version

POM configuration:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.coinidea</groupId>
  <artifactId>hive</artifactId>
  <packaging>war</packaging>
  <version>1.0-SNAPSHOT</version>
  <name>hive.web Maven Webapp</name>
  <url>http://maven.apache.org</url>
  <properties>
    <hadoop.version>2.5.0-cdh5.3.2</hadoop.version>
    <hive.version>0.13.1-cdh5.3.2</hive.version>
  </properties>
  <dependencies>
    <!--hadoop and hive-->
    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-jdbc</artifactId>
      <version>${hive.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-common</artifactId>
      <version>${hive.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-exec</artifactId>
      <version>${hive.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-metastore</artifactId>
      <version>${hive.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-service</artifactId>
      <version>${hive.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>${hadoop.version}</version>
      <exclusions>
        <exclusion>
          <groupId>org.mortbay.jetty</groupId>
          <artifactId>jetty</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.mortbay.jetty</groupId>
          <artifactId>jetty-util</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.mortbay.jetty</groupId>
          <artifactId>jsp-2.1</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.mortbay.jetty</groupId>
          <artifactId>jsp-api-2.1</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.mortbay.jetty</groupId>
          <artifactId>servlet-api-2.1</artifactId>
        </exclusion>
        <exclusion>
          <groupId>javax.servlet</groupId>
          <artifactId>servlet-api</artifactId>
        </exclusion>
        <exclusion>
          <groupId>javax.servlet.jsp</groupId>
          <artifactId>jsp-api</artifactId>
        </exclusion>
        <!--tomcat7 includes jasper.jar-->
        <exclusion>
          <groupId>tomcat</groupId>
          <artifactId>jasper-compiler</artifactId>
        </exclusion>
        <exclusion>
          <groupId>tomcat</groupId>
          <artifactId>jasper-runtime</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>cglib</groupId>
      <artifactId>cglib-nodep</artifactId>
      <version>2.2.2</version>
    </dependency>
  </dependencies>
  <build>
    <finalName>hive</finalName>
  </build>
  <repositories>
    <repository>
      <id>cloudera</id>
      <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
      <releases>
        <enabled>true</enabled>
      </releases>
      <snapshots>
        <enabled>false</enabled>
      </snapshots>
    </repository>
    <repository>
      <id>people.apache.snapshots</id>
      <url>http://repository.apache.org/content/groups/snapshots-group/</url>
      <releases>
        <enabled>false</enabled>
      </releases>
      <snapshots>
        <enabled>true</enabled>
      </snapshots>
    </repository>
  </repositories>
</project>

Create HiveTest.java

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package com.coinidea;

import java.sql.*;

public class HiveTest {
    /* Start server:  hive --service hiveserver2 --hiveconf hive.server2.thrift.port=10000 */
    private static String driverName = "org.apache.hive.jdbc.HiveDriver";

    /**
     * @param args
     * @throws SQLException
     */
    public static void main(String[] args) throws SQLException {
        try {
            Class.forName(driverName);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
            System.exit(1);
        }
        //replace "hive" here with the name of the user the queries should run as
        Connection con = DriverManager.getConnection("jdbc:hive2://192.168.*.*:10000/default", "hive", "hive");
        Statement stmt = con.createStatement();
        String table_user = "test.user";
        String user_sql = "select uri from " + table_user + " limit 10";
        System.out.println("Running: " + user_sql);
        ResultSet users = stmt.executeQuery(user_sql);
        while (users.next()) {
            System.out.println("USER:" + users.getString(1));
        }
        stmt.close();
    }
}

Before running HiveTest, you need to start HiveServer2

1
hive --service hiveserver2 --hiveconf hive.server2.thrift.port=10000