租户端大数据大数据处理套件（TBDS）开发者指南常用操作

常用操作

最近更新时间: 2026-03-13 09:03:00

连接方式

Kerberos环境

kerberos认证

klist -kt /var/krb5kdc/emr.keytab 
kinit -kt /var/krb5kdc/emr.keytab hadoop/{IP}@TBDS-{域名}

直连模式

/usr/local/service/hive/bin/beeline -u 'jdbc:hive2://{IP}:7001/;principal=hadoop/{IP}@TBDS-{域名}'

ZK模式

/usr/local/service/hive/bin/beeline -u 'jdbc:hive2://{IP1}:2181,{IP2}:2181,{IP3}:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2;principal=hadoop/_HOST@TBDS-{域名}'

#参数说明
-d   ---使用一个驱动类：beeline -d driver_class    
-e   ---使用一个查询语句：beeline -e "query_string"    
-f   ---加载一个文件：beeline -f filepath  多个文件用-e file1 -e file2  
-n   ---加载一个用户名：beeline -n valid_user    
-p   ---加载一个密码：beeline -p valid_password    
-u  ---加载一个JDBC连接字符串：
--autoCommit=[true/false] ---进入一个自动提交模式
--autosave=[true/false]   ---进入一个自动保存模式    
--color=[true/false]    ---显示用到的颜色  
--delimiterForDSV= DELIMITER ---分隔值输出格式的分隔符
--fastConnect=[true/false]  ---在连接时，跳过组建表等对象
--force=[true/false]    ---是否强制运行脚本    
--headerInterval=ROWS   ---输出的表间隔格式，默认是100: beeline --headerInterval=50    
--help ---帮助

simple模式

直连模式

/usr/local/service/hive/bin/beeline -u 'jdbc:hive2://{IP}:7001/;'

ZK模式

/usr/local/service/hive/bin/beeline -u 'jdbc:hive2://{IP1}:2181,{IP2}:2181,{IP3}:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2;'、

常用命令

# 查看数据库
show databases;

# 创建数据库：
create database test; 

# 创建表：
create table hive_test (a int, b string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
#创建数据表 hive_test, 并指定列分割符为','

# 查看表
show tables;

# 插入数据
insert into hive_test values(1,'gtc');

# 查询表中的前10个数据
select * from hive_test limit 10;

# 删除 Hive 表
drop table hive_test;

经典案例

创建不同类型表

# 创建Hive表（orc表）
CREATE TABLE IF NOT EXISTS tbtest_orc (
  `id_card` int,
  `tran_time` string,
  `name` string,
  `cash` int
  )
partitioned by(ds string)
stored as orc;

# 创建Hive表（orc+snappy压缩表）
CREATE TABLE IF NOT EXISTS tbtest_orc_snappy (
  `id_card` int,
  `tran_time` string,
  `name` string,
  `cash` int
  )
partitioned by(ds string)
stored as orc
TBLPROPERTIES ("orc.compression"="SNAPPY");

# 创建Hive表（parquet表）
CREATE  TABLE `tbtest_parquet_snappy`(
  `interfaceName` string,
  `uri` string,
  `reqMethod` string,
  `status` int,
  `isGzip` boolean,
  `response` string,
  `serverIp` string,
  `reqCost` float,
  `time` bigint,
  `schema` string) 
STORED AS PARQUET 
TBLPROPERTIES('parquet.compression'='SNAPPY') 

# 创建Hive表（json表）,可以解析json接口的数据
create external table if not exists tbtest_json(
id int,
name string
)
row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
stored as textfile;
##数据样例如下##
{"id":1,"name":"zhangsan1"}
{"id":2,"name":"zhangsan2"}
{"id":3,"name":"zhangsan3"}
{"id":4,"name":"zhangsan4"}

# 创建hive外表，使用external关键字，外表删除时不删除数据
create external table hive_test1 (
a int, 
b string) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

Hive事务表

set hive.support.concurrency = true;
set hive.exec.dynamic.partition.mode = nonstrict;
set hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.compactor.initiator.on = true;
set hive.compactor.worker.threads = 1;
create database if not exists hivetest;
use hivetest;
drop table if exists hive_trans_tbl;
create table hive_trans_tbl(id int, name String, age int)
clustered by (id) into 2 buckets
stored as orc
TBLPROPERTIES('transactional'='true');
insert into hive_trans_tbl values (1, '张三', 18);
select * from hivetest.hive_trans_tbl;

导入导出Hive表/分区数据

数据准备

#连接hive，创建数据
/usr/local/service/hive/bin/beeline -u 'jdbc:hive2://{IP}:7001/;principal=hadoop/{IP}@TBDS-{域名}'

create table export_test(id int,name string);
insert into export_test values(1,"HIVE");

#hdfs上创建数据目录
hdfs dfs -mkdir /tmp/export

简单导出导入

#在源端集群执行以下命令将表“export_test”的元数据和业务数据导出到创建的目录下。
export table export_test to 'hdfs:///tmp/export';

#在目标集群执行以下命令，将导出的表数据导入到表“export_test”中。
import from '/tmp/export'

在导入时重命名表

在源端集群执行以下命令将表“export_test”的元数据和业务数据导出到创建的目录下。
export table export_test to 'hdfs:///tmp/export';

在目标集群执行以下命令将导出的表数据导入到表“import_test”中。
import table import_test from '/tmp/export';

分区数据表准备

CREATE TABLE export_test_part (
    id INT,
    name STRING
)
PARTITIONED BY (pt1 STRING, pt2 STRING)
STORED AS PARQUET;

INSERT INTO export_test_part PARTITION (pt1='in', pt2='ka') VALUES (1, 'HIVE');
INSERT INTO export_test_part PARTITION (pt1='in', pt2='ka') VALUES (2, 'SPARK');

导出分区数据并导入

在源端集群执行以下命令将表“export_test”的pt1和pt2分区导出到创建的目录下。
EXPORT TABLE export_test_part PARTITION (pt1='in', pt2='ka') TO 'hdfs:///tmp/export/partition';

在目标集群执行以下命令将导出的表数据导入到表“export_test”中。
import from '/tmp/export/partition';