配置环境

本次配置的系统环境为:

OS macOS High Sierra 10.13.1

JDK Oracle Java SE JDK 1.8.0_45

首先从官网上的清华镜像源下载最新的稳定版本HBase,并解压到配置目录下

wget https://mirrors.tuna.tsinghua.edu.cn/apache/hbase/1.3.1/hbase-1.3.1-bin.tar.gz
tar -xf hbase-1.3.1-bin.tar.gz

进入HBase的目录

cd hbase-1.3.1-bin
ll

可以看到如下的文件目录结构

首先, 要配置一下HBase的坏境参数,在conf/hbase-env.sh下设置JAVA_HOME,接下来是适用于MacOS的配置方法。

export JAVA_HOME=`/usr/libexec/java_home`

接下来需要配置conf/hbase-site.xml,做一些基本的HBase的设置操作。

<configuration>
  <property>
    <name>hbase.zookeeper.property.dataDir</name>
    <value>/Users/chenrz925/dat/zookeeper</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://localhost:9000/hbase</value>
  </property>
</configuration>

可见,把ZooKeeper数据的目录设置在了本地目录里,这样就可以避免在/tmp目录里面进行读写,会被系统重启后被删除的问题,保证了稳定性,同时HBase的数据目录设置在了Hadoop的目录中,保证了HBase的分布式特性。

现在我们启动HBase

bin/start-hbase.sh

这个过程的输出如下

现在我们看一下本地的Web UI

至此,伪分布式的HBase配置就完成了,我们来运行一下HBase来测试一下。

按照图中的命令我进行了测试。

可以看到HBase已经能够正确运行了。

接下来我们基于HBase开发一些简单的功能。

使用Java API开发

之后基于Java API建立了TableUltility工具类,使用了如下的代码。

package cn.waterch.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

/**
 * Created by chenrz925 on 13/11/2017.
 * 4:52 PM Project: HBaseLab
 *
 * @author chenrz925
 */
public class TableUtility {
    private static boolean initialized = false;
    private static Configuration configuration;
    private static Connection connection;
    private static Admin admin;

    /**
     * 初始化连接
     */
    private static void initialize() {
        if (!initialized) {
            configuration = HBaseConfiguration.create();
            configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase");
            try {
                connection = ConnectionFactory.createConnection(configuration);
                admin = connection.getAdmin();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 关闭连接
     */
    private static void close() {
        try {
            if (admin != null) {
                admin.close();
            }
            if (connection != null) {
                connection.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 建立一个新表
     *
     * @param tableName    新表的表名
     * @param columnFamily 列族
     * @throws IOException
     */
    public static void createTable(String tableName, String[] columnFamily) throws IOException {
        initialize();
        TableName newTableName = TableName.valueOf(tableName);
        if (admin.tableExists(newTableName)) {
            System.err.println("Error: Table exists.");
        } else {
            HTableDescriptor tableDescriptor = new HTableDescriptor(newTableName);
            for (String column : columnFamily) {
                HColumnDescriptor columnDescriptor = new HColumnDescriptor(column);
                tableDescriptor.addFamily(columnDescriptor);
            }
            admin.createTable(tableDescriptor);
        }
        close();
    }

    /**
     * 插入新数据
     *
     * @param tableName    表名
     * @param rowKey       行键
     * @param columnFamily 列族
     * @param column       列限定符
     * @param value        数据值
     * @throws IOException
     */
    public static void insertData(String tableName, String rowKey, String columnFamily, String column, String value) throws IOException {
        initialize();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Put put = new Put(Bytes.toBytes(rowKey));
        put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
        table.put(put);
        table.close();
        close();
    }

    /**
     * @param tableName    表名
     * @param rowKey       行键
     * @param columnFamily 列族
     * @param column       列限定符
     * @throws IOException
     */
    public static void getData(String tableName, String rowKey, String columnFamily, String column) throws IOException {
        initialize();
        Table table = connection.getTable(TableName.valueOf(tableName));
        Get get = new Get(Bytes.toBytes(rowKey));
        get.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
        Result result = table.get(get);
        System.out.println(new String(result.getValue(columnFamily.getBytes(), column == null ? null : column.getBytes())));
        table.close();
        close();
    }
}

TableUtility类实现了基本的对表中数据进行增查的功能。根据实验需求,我还需要实现如下的方法。

createTable(String tableName, String[] fields)

addRecord(String tableName, String row, String[] fields, String[] values)

scanColumn(String tableName, String column)

modifyData(String tableName, String row, String column)

deleteRow(String tableName, String row)

deleteRow()实现如下

/**
* 删除行数据
* 
* @param tableName 表名
* @param rowKey    行键
* @throws IOException
*/
public static void deleteRow(String tableName, String rowKey) throws IOException {
  initialize();
  Table table = connection.getTable(TableName.valueOf(tableName));
  Delete delete = new Delete(Bytes.toBytes(rowKey));
  table.delete(delete);
  table.close();
  close();
}

modifyData()实现如下

/**
 * @param tableName 表名
 * @param row   行键
 * @param column 列名,如 'cf:cl'
 * @param value 数据值
 * @throws IOException
 */
public static void modifyData(String tableName, String row, String column, String value) throws IOException {
  initialize();
  Table table = connection.getTable(TableName.valueOf(tableName));
  Put put = new Put(Bytes.toBytes(row));
  String[] cut = column.split(":", 2);
  String columnFamily = cut[0];
  String columnName = null;
  if (cut.length > 1)
    columnName = cut[1];
  if (columnName == null) {
    System.err.println("Error: Column does not exists.");
  } else {
    put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), Bytes.toBytes(value));
    table.put(put);
  }
  close();
}

scanColumn()实现如下

/**
 * 查找列中数据
 *
 * @param tableName 表名
 * @param column  列名,如 'cf:cl'
 * @throws IOException
 */
public static void scanColumn(String tableName, String column) throws IOException {
  initialize();
  Table table = connection.getTable(TableName.valueOf(tableName));
  String[] cut = column.split(":", 2);
  Scan scan = new Scan();
  if (cut.length > 1) {
    scan.addColumn(Bytes.toBytes(cut[0]), Bytes.toBytes(cut[1]));
  } else {
    scan.addFamily(Bytes.toBytes(cut[0]));
  }
  ResultScanner scanner = table.getScanner(scan);
  for (Result result = scanner.next(); result != null; result = scanner.next()) {
    System.out.println(result);
  }
  close();
}

然后我们建立如下测试用例来建立实验要求的表

import cn.waterch.hadoop.TableUtility;
import org.junit.Test;

import java.io.IOException;

/**
 * Created by chenrz925 on 13/11/2017.
 * 11:48 PM Project: HBaseLab
 *
 * @author chenrz925
 */
public class TableUtilityTest {
    @Test
    public void BaseUnitTest() throws IOException {
        final String STU_TABLE = "Student";
        final String STU_COLFAM[] = new String[] {"S_Info"};
        final String COU_TABLE = "Course";
        final String COU_COLFAM[] = new String[] {"C_Info"};
        final String SC_TABLE = "SC";
        final String SC_COLFAM[] = new String[] {"SC_Info"};
        TableUtility.createTable(STU_TABLE, STU_COLFAM);
        TableUtility.createTable(COU_TABLE, COU_COLFAM);
        TableUtility.createTable(SC_TABLE, SC_COLFAM);
        TableUtility.insertData(STU_TABLE, "2015001", STU_COLFAM[0], "S_Name", "Zhangsan");
        TableUtility.insertData(STU_TABLE, "2015001", STU_COLFAM[0], "S_Sex", "male");
        TableUtility.insertData(STU_TABLE, "2015001", STU_COLFAM[0], "S_Age", "23");
        TableUtility.insertData(STU_TABLE, "2015002", STU_COLFAM[0], "S_Name", "Mary");
        TableUtility.insertData(STU_TABLE, "2015002", STU_COLFAM[0], "S_Sex", "female");
        TableUtility.insertData(STU_TABLE, "2015002", STU_COLFAM[0], "S_Age", "22");
        TableUtility.insertData(STU_TABLE, "2015003", STU_COLFAM[0], "S_Name", "Lisi");
        TableUtility.insertData(STU_TABLE, "2015003", STU_COLFAM[0], "S_Sex", "male");
        TableUtility.insertData(STU_TABLE, "2015003", STU_COLFAM[0], "S_Age", "24");
        TableUtility.insertData(COU_TABLE, "123001", COU_COLFAM[0], "C_Name", "Math");
        TableUtility.insertData(COU_TABLE, "123001", COU_COLFAM[0], "C_Credit", "2.0");
        TableUtility.insertData(COU_TABLE, "123002", COU_COLFAM[0], "C_Name", "Computer Science");
        TableUtility.insertData(COU_TABLE, "123002", COU_COLFAM[0], "C_Credit", "5.0");
        TableUtility.insertData(COU_TABLE, "123003", COU_COLFAM[0], "C_Name", "English");
        TableUtility.insertData(COU_TABLE, "123003", COU_COLFAM[0], "C_Credit", "3.0");
        TableUtility.insertData(SC_TABLE, "1", SC_COLFAM[0], "SC_Sno", "2015001");
        TableUtility.insertData(SC_TABLE, "1", SC_COLFAM[0], "SC_Cno", "123001");
        TableUtility.insertData(SC_TABLE, "1", SC_COLFAM[0], "SC_Score", "86");
        TableUtility.insertData(SC_TABLE, "2", SC_COLFAM[0], "SC_Sno", "2015001");
        TableUtility.insertData(SC_TABLE, "2", SC_COLFAM[0], "SC_Cno", "123003");
        TableUtility.insertData(SC_TABLE, "2", SC_COLFAM[0], "SC_Score", "69");
        TableUtility.insertData(SC_TABLE, "3", SC_COLFAM[0], "SC_Sno", "2015002");
        TableUtility.insertData(SC_TABLE, "3", SC_COLFAM[0], "SC_Cno", "123002");
        TableUtility.insertData(SC_TABLE, "3", SC_COLFAM[0], "SC_Score", "77");
        TableUtility.insertData(SC_TABLE, "4", SC_COLFAM[0], "SC_Sno", "2015002");
        TableUtility.insertData(SC_TABLE, "4", SC_COLFAM[0], "SC_Cno", "123003");
        TableUtility.insertData(SC_TABLE, "4", SC_COLFAM[0], "SC_Score", "99");
        TableUtility.insertData(SC_TABLE, "5", SC_COLFAM[0], "SC_Sno", "2015003");
        TableUtility.insertData(SC_TABLE, "5", SC_COLFAM[0], "SC_Cno", "123001");
        TableUtility.insertData(SC_TABLE, "5", SC_COLFAM[0], "SC_Score", "98");
        TableUtility.insertData(SC_TABLE, "6", SC_COLFAM[0], "SC_Sno", "2015003");
        TableUtility.insertData(SC_TABLE, "6", SC_COLFAM[0], "SC_Cno", "123002");
        TableUtility.insertData(SC_TABLE, "6", SC_COLFAM[0], "SC_Score", "95");
        TableUtility.scanColumn(SC_TABLE, "SC_Info");
        TableUtility.deleteRow(SC_TABLE, "4");
        TableUtility.scanColumn(SC_TABLE, "SC_Info");
    }
}

由此可见,Java方法构建完成并能够稳定支持HBase并建立相应的数据库表以及增改查等操作。