[root@nb0 data]# vi gen.sh
[root@nb0 data]# cat gen.sh
#!/bin/sh
for i in {1..100000};do
echo -e $i'\t'$RANDOM'\t'$RANDOM'\t'$RANDOM
done;
[root@nb0 data]# sh gen.sh > mydata.txt
You have mail in /var/spool/mail/root
[root@nb0 data]# vi mydata.txt
[root@nb0 data]# hdfs dfs -put mydata.txt input
17/07/19 20:38:55 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@nb0 data]# hdfs dfs -ls input
17/07/19 20:39:04 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 1 items
-rw-r--r-- 3 root hbase 1698432 2017-07-19 20:38 input/mydata.txt
You have mail in /var/spool/mail/root
[root@nb0 data]#
创建hbase数据表abc
hbase(main):007:0> create 'abc','info'
创建Hive外表
创建一个指向已经存在的HBase表的Hive表
由于HBase中没有数据类型信息,所以在存储数据的时候都转化为String类型
hive> create external table hbase_t1(rowkey string,data1 string,data2 string,data3 string)
> stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
> with serdeproperties ("hbase.columns.mapping" = ":key,info:data1,info:data2,info:data3")
> tblproperties ("hbase.table.name"="abc","hbase.mapred.out.puttable"="abc");
OK
Time taken: 0.213 seconds
hive>
创建一个Hive数据表
hive> create external table hive_t1(rowkey string,data1 string,data2 string,data3 string)
> row format delimited
> fields terminated by '\t'
> stored as textfile;
OK
Time taken: 0.105 seconds
hive>
导入数据
hive> load data inpath 'input/mydata.txt' into table hive_t1;
Loading data to table default.hive_t1
Table default.hive_t1 stats: [numFiles=1, totalSize=2287063]
OK
Time taken: 0.795 seconds
hive>
hive> insert overwrite table hbase_t1
> select rowkey,data1,data2,data3 from hive_t1;
Query ID = root_20170720022542_8b5292a8-7903-4e22-8258-223555fab220
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1500448404940_0002, Tracking URL = http://nb0:8088/proxy/application_1500448404940_0002/
Kill Command = /usr/lib/hadoop/bin/hadoop job -kill job_1500448404940_0002
Hadoop job information for Stage-0: number of mappers: 2; number of reducers: 0
2017-07-20 02:25:52,647 Stage-0 map = 0%, reduce = 0%
2017-07-20 02:26:03,331 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 19.73 sec
MapReduce Total cumulative CPU time: 19 seconds 730 msec
Ended Job = job_1500448404940_0002
MapReduce Jobs Launched:
Stage-Stage-0: Map: 2 Cumulative CPU: 19.73 sec HDFS Read: 2345042 HDFS Write: 0 SUCCESS
Total MapReduce CPU Time Spent: 19 seconds 730 msec
OK
Time taken: 22.873 seconds
hive>
hive> select count(*) from hbase_t1;
Query ID = root_20170720022640_c705b5a2-7db5-4bfa-8b43-f218ad588226
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1500448404940_0003, Tracking URL = http://nb0:8088/proxy/application_1500448404940_0003/
Kill Command = /usr/lib/hadoop/bin/hadoop job -kill job_1500448404940_0003
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2017-07-20 02:26:51,257 Stage-1 map = 0%, reduce = 0%
2017-07-20 02:27:01,201 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 6.91 sec
2017-07-20 02:27:07,482 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 9.56 sec
MapReduce Total cumulative CPU time: 9 seconds 560 msec
Ended Job = job_1500448404940_0003
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 9.56 sec HDFS Read: 14530 HDFS Write: 7 SUCCESS
Total MapReduce CPU Time Spent: 9 seconds 560 msec
OK
100000
Time taken: 29.51 seconds, Fetched: 1 row(s)
hive>
hive> select * from hbase_t1 limit 10;
OK
1 199 4567 25943
10 10448 19496 31645
100 26984 29011 13177
1000 4008 1275 8236
10000 10121 14333 24945
100000 14619 17100 556
10001 28304 22506 5836
10002 29960 4367 19187
10003 25065 21803 21932
10004 19965 31442 18762
Time taken: 0.188 seconds, Fetched: 10 row(s)
hive>
hbase(main):007:0> count 'abc'
Current count: 1000, row: 10897
Current count: 2000, row: 11797
Current count: 3000, row: 12697
Current count: 4000, row: 13597
Current count: 5000, row: 14497
Current count: 6000, row: 15397
Current count: 7000, row: 16297
Current count: 8000, row: 17197
Current count: 9000, row: 18097
Current count: 10000, row: 18998
Current count: 11000, row: 19898
Current count: 12000, row: 20797
Current count: 13000, row: 21697
Current count: 14000, row: 22597
Current count: 15000, row: 23497
Current count: 16000, row: 24397
Current count: 17000, row: 25297
Current count: 18000, row: 26197
Current count: 19000, row: 27097
Current count: 20000, row: 27998
Current count: 21000, row: 28898
Current count: 22000, row: 29798
Current count: 23000, row: 30697
Current count: 24000, row: 31597
Current count: 25000, row: 32497
Current count: 26000, row: 33397
Current count: 27000, row: 34297
Current count: 28000, row: 35197
Current count: 29000, row: 36097
Current count: 30000, row: 36998
Current count: 31000, row: 37898
Current count: 32000, row: 38798
Current count: 33000, row: 39698
Current count: 34000, row: 40597
Current count: 35000, row: 41497
Current count: 36000, row: 42397
Current count: 37000, row: 43297
Current count: 38000, row: 44197
Current count: 39000, row: 45097
Current count: 40000, row: 45998
Current count: 41000, row: 46898
Current count: 42000, row: 47798
Current count: 43000, row: 48698
Current count: 44000, row: 49598
Current count: 45000, row: 50497
Current count: 46000, row: 51397
Current count: 47000, row: 52297
Current count: 48000, row: 53197
Current count: 49000, row: 54097
Current count: 50000, row: 54998
Current count: 51000, row: 55898
Current count: 52000, row: 56798
Current count: 53000, row: 57698
Current count: 54000, row: 58598
Current count: 55000, row: 59498
Current count: 56000, row: 60397
Current count: 57000, row: 61297
Current count: 58000, row: 62197
Current count: 59000, row: 63097
Current count: 60000, row: 63998
Current count: 61000, row: 64898
Current count: 62000, row: 65798
Current count: 63000, row: 66698
Current count: 64000, row: 67598
Current count: 65000, row: 68498
Current count: 66000, row: 69398
Current count: 67000, row: 70297
Current count: 68000, row: 71197
Current count: 69000, row: 72097
Current count: 70000, row: 72998
Current count: 71000, row: 73898
Current count: 72000, row: 74798
Current count: 73000, row: 75698
Current count: 74000, row: 76598
Current count: 75000, row: 77498
Current count: 76000, row: 78398
Current count: 77000, row: 79298
Current count: 78000, row: 80197
Current count: 79000, row: 81097
Current count: 80000, row: 81998
Current count: 81000, row: 82898
Current count: 82000, row: 83798
Current count: 83000, row: 84698
Current count: 84000, row: 85598
Current count: 85000, row: 86498
Current count: 86000, row: 87398
Current count: 87000, row: 88298
Current count: 88000, row: 89198
Current count: 89000, row: 90097
Current count: 90000, row: 90998
Current count: 91000, row: 91898
Current count: 92000, row: 92798
Current count: 93000, row: 93698
Current count: 94000, row: 94598
Current count: 95000, row: 95498
Current count: 96000, row: 96398
Current count: 97000, row: 97298
Current count: 98000, row: 98198
Current count: 99000, row: 99098
Current count: 100000, row: 99999
100000 row(s) in 9.7420 seconds
=> 100000
hbase(main):008:0>
hbase(main):013:0> get 'abc','100000'
COLUMN CELL
info:data1 timestamp=1500531979008, value=14619
info:data2 timestamp=1500531979008, value=17100
info:data3 timestamp=1500531979008, value=556
3 row(s) in 0.0150 seconds
hbase(main):014:0>