HBASE的预分区设计

摘要:
Hbase生成分区种子1packagecom.spdbccc.edm.storm.common;23导入java.util.List;45importorg.apachemons.lang3.StringUtils;6个importorg.apach.hadoop.hbase.util.Bytes;78importcom.google.common.collect.Lists;91
  hbase生成分区种子  

1
package com.spdbccc.edm.storm.common; 2 3 import java.util.List; 4 5 import org.apache.commons.lang3.StringUtils; 6 import org.apache.hadoop.hbase.util.Bytes; 7 8 import com.google.common.collect.Lists; 9 10 /** 11 * HBase表管理器, 对Hbase表的rowkey规则生成,表名规则定义 12 * 13 * @author WJ 14 * 15 */ 16 public class HTableManager { 17 18 public static final byte[] DEFAULT_FAMILY_NAME = Bytes.toBytes("f1"); 19 20 private static final String[] PARTITIONS = generatPartitionSeed(); 21 22 /** 23 * 生成3844个分区种子 24 * 25 * @return String[] 26 */ 27 public static String[] generatPartitionSeed() { 28 List<Character> seeds = Lists.newArrayList(); 29 for (int i = '0'; i <= '9'; i++) { 30 seeds.add((char) i); 31 } 32 for (int i = 'A'; i <= 'Z'; i++) { 33 seeds.add((char) i); 34 } 35 for (int i = 'a'; i <= 'z'; i++) { 36 seeds.add((char) i); 37 } 38 int k = 0; 39 String[] partions = new String[seeds.size() * seeds.size()]; 40 for (int i = 0; i < seeds.size(); i++) { 41 for (int j = 0; j < seeds.size(); j++) { 42 partions[k] = StringUtils.join(seeds.get(i), seeds.get(j)); 43 k++; 44 } 45 } 46 return partions; 47 } 48 49 /** 50 * 按指定数量生成分区种子 51 * 52 * @param limit 53 * @return String[] 54 */ 55 public static String[] generatPartitionSeed(int limit) { 56 int size = PARTITIONS.length; 57 int[] space = new int[limit]; 58 for (int pt = 0; pt < size;) { 59 for (int j = 0; j < space.length; j++) { 60 ++space[j]; 61 pt++; 62 if (pt == size) { 63 break; 64 } 65 } 66 } 67 String[] seed = new String[limit + 1]; 68 int position = 0; 69 for (int i = 0; i < space.length; i++) { 70 seed[i] = PARTITIONS[position]; 71 position += space[i]; 72 } 73 seed[seed.length - 1] = PARTITIONS[PARTITIONS.length - 1]; 74 return seed; 75 } 76 77 public static String generatRowkey(String str) { 78 int i = Math.abs(str.hashCode() % PARTITIONS.length); 79 return StringUtils.join(PARTITIONS[i], "-", str); 80 } 81 82 public static byte[] generatByteRowkey(String str) { 83 int i = Math.abs(str.hashCode() % PARTITIONS.length); 84 return Bytes.toBytes(StringUtils.join(PARTITIONS[i], "-", str)); 85 } 86 87 public static String getEventLogTableName(String event) { 88 return StringUtils.join("EVENT_LOG_", event); 89 } 90 91 public static String getGroupVarTableName() { 92 return "CUSTOM_VARIABLE_GROUP"; 93 } 94 95 96 public static String getActivityVarTableName() { 97 return "CUSTOM_VARIABLE_ACTIVITY"; 98 } 99 100 public static String getUserVarTableName() { 101 return "CUSTOM_VARIABLE_USER"; 102 } 103 104 public static String getUserInfoTableName() { 105 return "USER_WIDE_PUB"; 106 } 107 108 public static String getCardInfoTableName() { 109 return "CARD_WIDE_PUB"; 110 } 111 112 public static String getAcctInfoTableName() { 113 return "ACCT_WIDE_PUB"; 114 } 115 116 public static String getMetricTableName() { 117 return "METRICS"; 118 } 119 120 public static String getCustDefinitionTableName() { 121 return "CUST_USER_DEFINITION"; 122 } 123 124 public static String getCardDefinitionTableName() { 125 return "CARD_USER_DEFINITION"; 126 } 127 128 public static String getEventLogLbsHisName(){ 129 return "EVENT_LOG_LBS_HIS"; 130 } 131 132 public static String getRankLogName(){ 133 return "RANK_LOG"; 134 } 135 136 public static void main(String[] args) { 137 String[] arr = generatPartitionSeed(101); 138 for (int i = 0; i < arr.length; i++) { 139 System.out.println(arr[i]); 140 } 141 } 142 }
按指定分区数量创建预分区表

1
package com.spdbccc.edm.storm.common; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.hbase.HBaseConfiguration; 5 import org.apache.hadoop.hbase.HColumnDescriptor; 6 import org.apache.hadoop.hbase.HTableDescriptor; 7 import org.apache.hadoop.hbase.TableName; 8 import org.apache.hadoop.hbase.client.HBaseAdmin; 9 import org.apache.hadoop.hbase.io.compress.Compression; 10 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 11 import org.apache.hadoop.hbase.util.Bytes; 12 13 public class HTableUtil { 14 15 private static final int MAX_FILE_SIZE = 1024 * 1024 * 256; 16 17 public static void main(String[] args) throws Exception { 18 int limit = 100; 19 20 createHBaseTable(HTableManager.getEventLogTableName("TEST"), limit); 21 22 // createHBaseTable(HTableManager.getEventLogTableName("DH"), limit); 23 24 // createHBaseTable(HTableManager.getActivityVarTableName(), limit); 25 26 // createHBaseTable(HTableManager.getUserVarTableName(), limit); 27 28 } 29 30 private static HTableDescriptor getHTableDescriptor(String tableName) { 31 HColumnDescriptor columnDescriptor = new HColumnDescriptor(HTableManager.DEFAULT_FAMILY_NAME); 32 columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY); 33 columnDescriptor.setCompactionCompressionType(Compression.Algorithm.SNAPPY); 34 // columnDescriptor.setTimeToLive(60 * 60 * 24 * 365 * 1); 35 columnDescriptor.setBlockCacheEnabled(true); 36 columnDescriptor.setDataBlockEncoding(DataBlockEncoding.NONE); 37 38 HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName)); 39 desc.setCompactionEnabled(true); 40 desc.setMaxFileSize(MAX_FILE_SIZE); 41 desc.addFamily(columnDescriptor); 42 return desc; 43 } 44 45 /** 46 * 创建表 47 * 48 * @param tableName 49 * 表名 50 * @param partitionSeedLimit 51 * 预分区数量,最大数量3844 52 * @throws Exception 53 */ 54 public static void createHBaseTable(String tableName, int partitionSeedLimit) throws Exception { 55 if (partitionSeedLimit > 3844 || partitionSeedLimit < 1) { 56 throw new IllegalArgumentException("PartitionSeedLimit must be > 0 and < 3844."); 57 } 58 System.out.println("init HBase admin..."); 59 Configuration conf = HBaseConfiguration.create(); 60 HBaseAdmin admin = new HBaseAdmin(conf); 61 62 if (admin.tableExists(tableName)) { 63 if (!admin.isTableDisabled(tableName)) { 64 admin.disableTable(tableName); 65 } 66 admin.deleteTable(tableName); 67 System.out.println(String.format("Table is exist, drop table %s successed.", tableName)); 68 } 69 70 System.out.println(String.format("Creating HBase table %s, partition seed limit %d.", tableName, partitionSeedLimit)); 71 admin.createTable(getHTableDescriptor(tableName), Bytes.toByteArrays(HTableManager.generatPartitionSeed(partitionSeedLimit))); 72 System.out.println(String.format("HBase table %s is created.", tableName)); 73 admin.close(); 74 System.out.println("=============================================="); 75 } 76 77 // public static void main(String[] args) throws Exception { 78 // String tableName = HTableManager.getEventLogTableName(args[0]); 79 // int limit = Integer.parseInt(args[1]); 80 // System.out.println("创建表: " + tableName); 81 // createHBaseTable(tableName, limit); 82 // System.out.println("创建成功: " + tableName); 83 // } 84 85 }
生成符合规则的rowkey
1
String key = "NE123456789"; 2 String messageKey = HTableManager.generatRowkey(key) + "-" + YZYT_msg_type;

免责声明:文章转载自《HBASE的预分区设计》仅用于学习参考。如对内容有疑问,请及时联系本站处理。

上篇Nginx 安装及配置、负载均衡https网站及转发后页面js、css等路径找不到问题、更换证书导致问题解决glibcxx升级下篇

宿迁高防,2C2G15M,22元/月;香港BGP,2C5G5M,25元/月 雨云优惠码:MjYwNzM=

相关文章

PHP解决网站大流量与高并发

1:硬件方面   普通的一个p4的服务器每天最多能支持大约10万左右的IP,如果访问量超过10W那么需要专用的服务器才能解决,如果硬件不给力 软件怎么优化都是于事无补的。主要影响服务器的速度 有:网络-硬盘读写速度-内存大小-cpu处理速度。 2:软件方面     第一个要说的就是数据库   首先要有一个很好的架构,查询尽量不用* 避免相关子查询 给经常查...

ubuntu14.04安装 Apache2 并配置https

一、安装 Apache2   sudo apt-get update   sudo apt-get install apache2   安装完apache2,默认根目录在/var/www/html 下,点击其下的html 文件,可打开 Apache2的默认页面。 输入 http://localhost/index.html, 也可以通过http://...

本地idea开发mapreduce程序提交到远程hadoop集群执行

https://www.codetd.com/article/664330 https://blog.csdn.net/dream_an/article/details/84342770 通过idea开发mapreduce程序并直接run,提交到远程hadoop集群执行mapreduce。 简要流程:本地开发mapreduce程序–>设置yarn...

HDFS只支持文件append操作, 而依赖HDFS的HBase如何完成数据的增删改查

转:http://www.th7.cn/db/nosql/201510/135382.shtml 1. HDFS的文件append功能 早期版本的HDFS不支持任何的文件更新操作,一旦一个文件创建、写完数据、并关闭之后,这个文件就再也不能被改变了。为什么这么设计?是为了与MapReduce完美配合,MapReduce的工作模式是接受一系列输入文件,经过ma...

java 调用soap的简单例子(转载)

[代码] OrderProcessor.javaview sourceprint?001    package javaxml2;002     003    import java.io.IOException;004    import java.io.StringWriter;005    import java.net.MalformedURLEx...

ibatis DTD 2.0 3.0

ibatis 3.0 DTD New sqlMapConfig.xml DTD: <!DOCTYPE configuration PUBLIC "-//mybatis.org//DTD Config 3.0//EN" "http://mybatis.org/dtd/mybatis-3-config.dtd"> New sqlMap (*.m...