目录
- 1. Memory表引擎
- 2. Set表引擎
- 3. Join表引擎
- 4. Buffer表引擎
Memory、Buffer表引擎数据保存在内存;Set、Join表引擎会将数据同步到磁盘,以防数据丢失, 当服务重启时,会把全部数据重新加载到内存
1. Memory表引擎- Memory表引擎一般做测试用,或者Clickhouse内部用于集群间分发数据的临时载体;
- Memory表引擎在内存的数据,不会被压缩或进行数据格式转换
- 支持并行查询
- 创建表
clickhouse1 :)
clickhouse1 :) create table memory_table_local on cluster sharding_cluster(
:-] id UInt32
:-] ) engine = Memory();
CREATE TABLE memory_table_local ON CLUSTER sharding_cluster
(
`id` UInt32
)
ENGINE = Memory
Query id: 5fe4fc73-2f73-48c2-b16e-0b42fb724f15
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse1 │ 9000 │ 0 │ │ 3 │ 1 │
│ clickhouse3 │ 9000 │ 0 │ │ 2 │ 1 │
│ clickhouse4 │ 9000 │ 0 │ │ 1 │ 1 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.366 sec.
clickhouse1 :)
clickhouse1 :) create table memory_table_all on cluster sharding_cluster(
:-] id UInt32
:-] ) engine = Distributed(sharding_cluster, default, memory_table_local, id);
CREATE TABLE memory_table_all ON CLUSTER sharding_cluster
(
`id` UInt32
)
ENGINE = Distributed(sharding_cluster, default, memory_table_local, id)
Query id: 3a158669-bdf3-4113-958a-ba2b74e5d4f5
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 3 │ 0 │
│ clickhouse3 │ 9000 │ 0 │ │ 2 │ 0 │
│ clickhouse1 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse4 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.136 sec.
clickhouse1 :)
- 插入数据
clickhouse1 :)
clickhouse1 :) insert into memory_table_all(id) values(1), (2), (3), (4), (5), (6);
INSERT INTO memory_table_all (id) VALUES
Query id: 6343a173-dcf4-435a-b341-abe6745f402b
Ok.
6 rows in set. Elapsed: 0.015 sec.
clickhouse1 :)
- 查询数据
clickhouse1 :)
clickhouse1 :) select * from memory_table_all;
SELECT *
FROM memory_table_all
Query id: d264e0ea-998b-48d1-a7ba-e30023456c78
┌─id─┐
│ 4 │
└────┘
┌─id─┐
│ 1 │
│ 5 │
└────┘
┌─id─┐
│ 3 │
└────┘
┌─id─┐
│ 2 │
│ 6 │
└────┘
6 rows in set. Elapsed: 0.027 sec.
clickhouse1 :)
2. Set表引擎
- 具有数据去重的功能,所有数据元素是唯一的
- select查询只能作为IN查询的右侧条件,说明只能有一个表字段;且右侧条件的Set表只能是local表
- 创建表
clickhouse1 :)
clickhouse1 :) create table memory_set_local(
:-] id UInt32
:-] ) engine = Set();
CREATE TABLE memory_set_local
(
`id` UInt32
)
ENGINE = Set
Query id: dec311f4-ab8a-4247-b481-142c9ac2aad2
Ok.
0 rows in set. Elapsed: 0.004 sec.
clickhouse1 :)
- 插入数据
clickhouse1 :)
clickhouse1 :) insert into memory_set_local select number from numbers(10);
INSERT INTO memory_set_local SELECT number
FROM numbers(10)
Query id: b263d2bf-e823-4126-9a75-73a3083dc98d
Ok.
0 rows in set. Elapsed: 0.002 sec.
clickhouse1 :)
- 查询数据
clickhouse1 :)
clickhouse1 :) select arrayJoin([0, 9, 10, 99]) as a where a in memory_set_local;
SELECT arrayJoin([0, 9, 10, 99]) AS a
WHERE a IN (memory_set_local)
Query id: 43f99779-4b6b-43fb-b3b2-5c79e2d2da46
┌─a─┐
│ 0 │
│ 9 │
└───┘
2 rows in set. Elapsed: 0.037 sec.
clickhouse1 :)
3. Join表引擎
一般表进行join操作,都是从磁盘进行读取数据,而对于数据量比较少的表,我们可以采用Join表引擎,从内存读取数据
- 创建表
左表(非Join表引擎)
clickhouse1 :)
clickhouse1 :) create table left_table_local on cluster sharding_cluster(
:-] id UInt32,
:-] name String,
:-] city String
:-] ) engine = MergeTree()
:-] order by id;
CREATE TABLE left_table_local ON CLUSTER sharding_cluster
(
`id` UInt32,
`name` String,
`city` String
)
ENGINE = MergeTree
ORDER BY id
Query id: cdf99035-125f-4717-abdb-322b40617b77
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 3 │ 0 │
│ clickhouse1 │ 9000 │ 0 │ │ 2 │ 0 │
│ clickhouse3 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse4 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.133 sec.
clickhouse1 :)
clickhouse1 :) create table left_table_all on cluster sharding_cluster(
:-] id UInt32,
:-] name String,
:-] city String
:-] ) engine = Distributed(sharding_cluster, default, left_table_local, id);
CREATE TABLE left_table_all ON CLUSTER sharding_cluster
(
`id` UInt32,
`name` String,
`city` String
)
ENGINE = Distributed(sharding_cluster, default, left_table_local, id)
Query id: b40fa73c-e3fd-492f-a40c-33d2bcbb00df
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 3 │ 0 │
│ clickhouse1 │ 9000 │ 0 │ │ 2 │ 0 │
│ clickhouse3 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse4 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.125 sec.
右表(Join表引擎)
clickhouse1 :)
clickhouse1 :) create table join_table_local on cluster sharding_cluster(
:-] id UInt8,
:-] subject String,
:-] score Float64
:-] ) engine = Join(any, left, id);
CREATE TABLE join_table_local ON CLUSTER sharding_cluster
(
`id` UInt8,
`subject` String,
`score` Float64
)
ENGINE = Join(any, left, id)
Query id: 48074123-f645-4420-8a8f-e263c281ef1d
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse4 │ 9000 │ 0 │ │ 3 │ 3 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 2 │ 0 │
│ clickhouse1 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse3 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.115 sec.
clickhouse1 :)
clickhouse1 :) create table join_table_all on cluster sharding_cluster(
:-] id UInt8,
:-] subject String,
:-] score Float64
:-] ) engine = Distributed(sharding_cluster, default, join_table_local, id);
CREATE TABLE join_table_all ON CLUSTER sharding_cluster
(
`id` UInt8,
`subject` String,
`score` Float64
)
ENGINE = Distributed(sharding_cluster, default, join_table_local, id)
Query id: 0196456e-2d20-4256-8470-5e64cba1b203
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 3 │ 0 │
│ clickhouse1 │ 9000 │ 0 │ │ 2 │ 0 │
│ clickhouse3 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse4 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.125 sec.
clickhouse1 :)
- Join表引擎参数说明:
- 连接精度(第一个):all、any、asof;当为any时,插入的数据join_key重复则自动被忽略
- 连接类型(第二个):inner、(left/right/full) outer、cross
- 连接键join_key:多个逗号分隔
- 当与左表join的时候,如果join的操作和表引擎参数不一致时,返回的数据可能不对
- 插入数据
clickhouse1 :)
clickhouse1 :) insert into left_table_all(id, name, city) values(1, 'name1', 'Beijing'), (2, 'name2', 'Shanghai'), (3, 'name3', 'Guangzhou');
INSERT INTO left_table_all (id, name, city) VALUES
Query id: a49ca50e-57b5-49f0-a653-d520fbbe68a5
Ok.
3 rows in set. Elapsed: 0.004 sec.
clickhouse1 :)
clickhouse1 :) insert into join_table_all(id, subject, score) values(1, 'math', 90), (1, 'math', 95), (2, 'english', 80);
INSERT INTO join_table_all (id, subject, score) VALUES
Query id: 3e002373-06c7-46e7-bce7-118aad0a3df2
Ok.
3 rows in set. Elapsed: 0.006 sec.
clickhouse1 :)
- join操作查询数据
clickhouse1 :)
clickhouse1 :) select id, name, city, subject, score from left_table_all global left join join_table_all using(id);
SELECT
id,
name,
city,
subject,
score
FROM left_table_all
GLOBAL LEFT JOIN join_table_all USING (id)
Query id: c58ea233-0738-4bd1-9479-c05b5e4499c9
┌─id─┬─name──┬─city─────┬─subject─┬─score─┐
│ 2 │ name2 │ Shanghai │ english │ 80 │
└────┴───────┴──────────┴─────────┴───────┘
┌─id─┬─name──┬─city──────┬─subject─┬─score─┐
│ 3 │ name3 │ Guangzhou │ │ 0 │
└────┴───────┴───────────┴─────────┴───────┘
┌─id─┬─name──┬─city────┬─subject─┬─score─┐
│ 1 │ name1 │ Beijing │ math │ 90 │
└────┴───────┴─────────┴─────────┴───────┘
3 rows in set. Elapsed: 0.051 sec.
clickhouse1 :)
- 直接查询Join引擎表数据
clickhouse1 :)
clickhouse1 :) select * from join_table_all;
SELECT *
FROM join_table_all
Query id: 76349c9b-0ad3-4a84-99ad-c1a15a4e351a
┌─id─┬─subject─┬─score─┐
│ 1 │ math │ 90 │
└────┴─────────┴───────┘
┌─id─┬─subject─┬─score─┐
│ 2 │ english │ 80 │
└────┴─────────┴───────┘
2 rows in set. Elapsed: 0.018 sec.
clickhouse1 :)
clickhouse2 :) select joinGet('join_table_local', 'subject', toUInt8(1));
SELECT joinGet('join_table_local', 'subject', toUInt8(1))
Query id: 27b60b86-4e21-4476-82cc-ad809de4eadd
┌─joinGet('join_table_local', 'subject', toUInt8(1))─┐
│ math │
└────────────────────────────────────────────────────┘
1 rows in set. Elapsed: 0.003 sec.
clickhouse2 :)
- id = 1的数据插入了两条,查询的时候只有一条
- joinGet函数:用第3个参数的值,关联表的join_key,在结果中取第二个参数字段的数据
当向一个目标表insert的数据量特别大,快到数据partition合并的速度,可以考虑使用Buffer表引擎,将数据缓冲到Buffer表,待满足一定条件,再把数据刷新到目标表;这样目标表争取了时间,而且减少了insert的次数,也就减少了partition合并的次数
- 创建表
最终储存数据的表
clickhouse2 :)
clickhouse2 :) create table data_table_local on cluster sharding_ha(
:-] id UInt32,
:-] name String,
:-] city String
:-] ) engine = ReplicatedMergeTree('/clickhouse/tables/data_table/{shard}', '{replica}')
:-] order by id;
CREATE TABLE data_table_local ON CLUSTER sharding_ha
(
`id` UInt32,
`name` String,
`city` String
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/data_table/{shard}', '{replica}')
ORDER BY id
Query id: 1e519493-7007-40cc-b9ce-b9820d242deb
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse1 │ 9000 │ 0 │ │ 3 │ 2 │
│ clickhouse3 │ 9000 │ 0 │ │ 2 │ 2 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse4 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.200 sec.
clickhouse2 :)
clickhouse2 :) create table data_table_all on cluster sharding_ha(
:-] id UInt32,
:-] name String,
:-] city String
:-] ) engine = Distributed(sharding_ha, default, data_table_local, id);
CREATE TABLE data_table_all ON CLUSTER sharding_ha
(
`id` UInt32,
`name` String,
`city` String
)
ENGINE = Distributed(sharding_ha, default, data_table_local, id)
Query id: 48e7683e-d7f9-425d-be4e-8a5c421c6045
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse1 │ 9000 │ 0 │ │ 3 │ 2 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 2 │ 0 │
│ clickhouse3 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse4 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.086 sec.
clickhouse2 :)
Buffer缓冲表
clickhouse2 :)
clickhouse2 :) create table buffer_table_local on cluster sharding_cluster as data_table_local
:-] engine = Buffer(default, data_table_all, 4, 60, 600, 100, 1000, 102400, 204800);
CREATE TABLE buffer_table_local ON CLUSTER sharding_cluster AS data_table_local
ENGINE = Buffer(default, data_table_all, 4, 60, 600, 100, 1000, 102400, 204800)
Query id: 1fbff483-bf38-41ad-adef-48f137290a83
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 3 │ 0 │
│ clickhouse1 │ 9000 │ 0 │ │ 2 │ 0 │
│ clickhouse3 │ 9000 │ 0 │ │ 1 │ 0 │
│ clickhouse4 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.134 sec.
clickhouse2 :)
clickhouse2 :) create table buffer_table_all on cluster sharding_cluster as data_table_all
:-] engine = Distributed(sharding_cluster, default, buffer_table_local, id);
CREATE TABLE buffer_table_all ON CLUSTER sharding_cluster AS data_table_all
ENGINE = Distributed(sharding_cluster, default, buffer_table_local, id)
Query id: c842a2eb-a139-4c7b-baa1-02ddc667914c
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse1 │ 9000 │ 0 │ │ 3 │ 1 │
│ clickhouse3 │ 9000 │ 0 │ │ 2 │ 1 │
│ clickhouse4 │ 9000 │ 0 │ │ 1 │ 1 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
┌─host────────┬─port─┬─status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ clickhouse2 │ 9000 │ 0 │ │ 0 │ 0 │
└─────────────┴──────┴────────┴───────┴─────────────────────┴──────────────────┘
4 rows in set. Elapsed: 0.103 sec.
clickhouse2 :)
Buffer表引擎的说明:
- 参数说明
- 数据最终插入数据库
- 数据最终插入的表
- 每个服务器buffer表的线程数,轮询接收数据
- 刷新到目标表的最小时间,第一次insert时开始计时,单位:秒
- 刷新到目标表的最大时间
- 刷新到目标表的最小数据量
- 刷新到目标表的最大数据量
- 刷新到目标表的最小字节数
- 刷新到目标表的最大字节数
- 刷新规则(满足其一即可,每个线程的计算是独立的)
- 同时满足3个最小
- 只要满足一个最大
- 一次insert的数据超过最大数据量 / 最大字节数,该insert直接写入目标表
- 向Buffer缓冲表插入数据
clickhouse2 :)
clickhouse2 :) insert into buffer_table_all(id, name, city) select number, concat('name', toString(number)), concat('city', toString(number)) from numbers(1000);
clickhouse2 :)
再上面的命令执行了20多次后,才能从data_table_all表查询到数据