copy支持多字节分隔符
最近更新时间: 2025-02-18 16:02:00
TDSQL PG支持多字节分隔符入库,限制10字节以内,支持中文分隔符,支持多字符集utf8以及gbk。
示例:
1、csv格式入库
--创建表
postgres=#
create table test_copy_base_1(c0 int,c1 macaddr,c2 point,c3 text,c4 numeric,c5 char,c6 oidvector,c7 float4,c8 abstime,c9 reltime,c10 polygon,c11 bool,c12 tinterval,c13 money,c14 tid,c15 circle,c16 macaddr8,c17 inet,c18 varbit(5),c19 varchar,c20 timestamp,c21 oid,c22 line,c23 int4,c24 int2,c25 pg_oracle.date,c26 path,c27 bit varying(5),c28 int8,c29 box,c30 lseg,c31 name,c32 interval,c33 timestamptz,c34 bpchar,c35 time,c36 float8,c37 timetz,c38 bytea,c39 varchar2,c40 number,c41 serial,c42 bigserial) distribute by shard(c0);
---插入数据
create or REPLACE procedure insert_data_start(starts int, table_num int, table_name varchar2)
as
v_sql varchar2;
region varchar[];
BEGIN
region := array['深圳','北京','上海','广州','重启','杭州','合肥','成都'];
v_sql := 'insert into '||table_name||' select i, concat_ws(''-'',to_hex((i%192)+1),to_hex((i%168)+1),to_hex((i%200)+1),to_hex((i%100)+1),to_hex((i%98)+1),to_hex((i%255)+1))::macaddr,point(ceil((i%68)+1),ceil((i%77)+1)),''南山''||(i%100)||''号'',round((0.356*500*i)%876,3)::numeric,(i%10)::char,(i*987)::varchar::oidvector,round(i*5.301,3)::float4,abstime(''2022-06-07 12:12:12''::timestamp+(i%256)*interval ''1 day''),reltime(i*542),polygon (box(point(i%550,i%360),point(i%50,i%22))),bool(i%2),tinterval(abstime(''2022-06-07 12:12:12''::timestamp+(i%300)*interval ''1 day''),abstime(''2022-06-07 12:12:12''::timestamp+i*interval ''1 hour'')),((i*3.156)%9999)::money,(i%798,i%666)::varchar::tid,circle(point(i%250,i%160),i%76),concat_ws(''-'',to_hex((i%192)+16),to_hex((i%168)+16),to_hex((i%200)+16),to_hex((i%100)+16),to_hex((i%98)+16),to_hex((i%239)+16))::macaddr8,concat(concat_ws(''.'',(i%100)+100,(i%100)+1,(i%100)+1,(i%100)+100),''/25'')::inet,((i+1)%98799)::int::bit(5)::varbit(5),(''开发''||(i%20)+1||''组'')::varchar,(date ''2022-06-07 12:12:12''+i * interval ''1 day''+(i%24) * interval ''1 hour''+(i%60) * interval ''1 minute''+(i%60) * interval ''1 second'')::timestamp,(i%999)::bigint::oid,line(point(i%93+1,i%120),point(i%450+2,i%36)),int4(i%1000),int2(i%150),date ''2022-06-07 12:12:12''+(i%378)*interval ''1 day'',path (polygon(box(point(i%192,i%120),point(i%913,i%120)))),int4(i%10000)::bit(5),int8(i*999),box(point(i%292+1,i%20),point(i%92+2,i%24)),lseg(point(i%92+1,i%220),point(i%238+2,i%120)),name(''腾讯''||(i%12)),i * interval ''1 day'',timestamptz(date(date ''2022-06-07 12:12:12'' + (i%777) * interval ''1 day''),(timestamp ''2022-06-07 12:12:12''+(i%100) * interval ''1 hour'')::time without time zone),bpchar((''你好''||(i%98))::text),(i%300)*interval ''1 hour 1 minute 1 second'',float8((i*3.122333)%998),timetz(timestamptz ''2022-06-07 10:56:00+08'' + (i%200) * interval ''1 hour''),repeat(''深圳''||(i%1000),3)::bytea,(''tbase in 深圳''||(i%900))::varchar2,((i*3.1123)%998)::number(10,2) from generate_series('||starts||','||table_num||') i';
-- RAISE notice '%', v_sql;
execute v_sql;
end;
/
postgres=#
call insert_data_start(1, 5000, 'test_copy_base_1');
postgres=#
analyze test_copy_base_1;
--出库
COPY (select * from test_copy_base_1 order by c0) to '/tmp/test_copy_base_1.data' with CSV delimiter '$^$';
--入库
COPY test_copy_base_1 from '/tmp/test_copy_base_1.data' with CSV delimiter '$^$';
--查询入库结果
postgres=# select count(1) from test_copy_base_1;
count
-------
5000
(1 row)
postgres=# select c0,c22,c25 from test_copy_base_1 order by c0,c25 desc nulls last limit 5;
c0 | c22 | c25
----+----------+---------------------
1 | {0,-1,1} | 2022-06-08 12:12:12
2 | {0,-1,2} | 2022-06-09 12:12:12
3 | {0,-1,3} | 2022-06-10 12:12:12
4 | {0,-1,4} | 2022-06-11 12:12:12
5 | {0,-1,5} | 2022-06-12 12:12:12
(5 rows)
2、支持中文分隔符
--出库
postgres=#
COPY (select * from test_copy_base_1 order by c0) to '/tmp/test_copy_base_2.data' (format TEXT, delimiter '你好');
postgres=#
delete from test_copy_base_1 where 1=1;
--入库
postgres=#
-----使用错误分隔符会报错
COPY test_copy_base_1 from '/tmp/test_copy_base_2.data' with (format TEXT,delimiter '//');
ERROR: invalid input syntax for type numeric: "1你好02:02:02:02:02:02你好(2,2)你好南山1号你好178你好1你好987你好5.301你好2022-06-08 12:12:12 +08:00你好00:09:02你好((1,1),(1,1),(1,1),(1,1))你好t你好["2022-06-07 13:12:12 +08:00" "2022-06-08 12:12:12 +08:00"]你好¥3.16你好(1,1)你好
CONTEXT: COPY test_copy_base_1, line 1, column c0: "1你好02:02:02:02:02:02你好(2,2)你好南山1号你好178你好1你好987你好5.301你好2022-06...", nodetype:1(1:cn,0:dn)
postgres=#
COPY test_copy_base_1 from '/tmp/test_copy_base_2.data' with (format TEXT,delimiter '你好');
--查询入库结果
postgres=# select count(1) from test_copy_base_1;
count
-------
5000
(1 row)