家资是何物,积帙列梁梠。这篇文章主要讲述clickhouse 20.x 三分片两副本部署与本地表的压力测试相关的知识,希望能为你提供帮助。
标签(空测试用例格分隔):clickhouse 系列
一:系统环境准备
1.1 系统初始化
系统:
CentOS7.9x64
主机名:
cat /etc/hosts
----
192.168.100.141 node01
192.168.100.142 node02
192.168.100.143 node03
192.168.100.144 node04
192.168.100.145 node05
192.168.100.146 node06
-----
本次部署6台服务器推荐配置每台: 内存:64G+ CPU:32core+硬盘500G+
1.2 系统部署jdk
在node01,node02,node03,node04,node05 上面部署tar -zxvf jdk-8u301-linux-x64.tar.gz
mv jdk1.8.0_301/ /usr/local/jdk
vim /etc/profile
----### jdk
export java_HOME=/usr/local/jdk
export CLASSPATH=.:$JAVA_HOME/jre/lib:$JAVA_HOME/lib:$JAVA_HOME/lib/tools.jar
PATH=$PATH:$HOME/bin:$JAVA_HOME/bin
----source /etc/profilejava -version
文章图片
文章图片
1.3 配置zookeeper 集群
在node01,node02,node03,node04,node05上面部署
tar -zxvf apache-zookeeper-3.7.0-bin.tar.gz
mv apache-zookeeper-3.7.0-bin /usr/local/zookeepermkdir -p /usr/local/zookeeper/data
mkdir -p /usr/local/zookeeper/logcd /usr/local/zookeeper/data/echo 1 >
myid
----
cd /usr/local/zookeeper/conf cp zoo_sample.cfg zoo.cfg vim zoo.cfg ----
# 心跳时间
tickTime=2000
# follow连接leader的初始化连接时间,表示tickTime的倍数
initLimit=10
# syncLimit配置表示leader与follower之间发送消息,请求和应答时间长度。如果followe在设置的时间内不能与leader进行通信,那么此follower将被丢弃,tickTime的倍数
syncLimit=5
# 客户端连接端口
clientPort=2181
# 节点数据存储目录,需要提前创建,注意myid添加,用于标识服务器节点
dataDir=/usr/local/zookeeper/data
dataLogDir=/usr/local/zookeeper/log
server.1=192.168.100.141:2888:3888
server.2=192.168.100.142:2888:3888
server.3=192.168.100.143:2888:3888
server.4=192.168.100.144:2888:3888
server.5=192.168.100.145:2888:3888
---
-----scp -r zookeeper root@node02:/usr/local/
scp -r zookeeper root@node03:/usr/local/
scp -r zookeeper root@node04:/usr/local/
scp -r zookeeper root@node05:/usr/local/
修改node02 节点 myid
cd /usr/local/zookeeper/data/
echo 2 >
myid
修改node03 节点 myid
cd /usr/local/zookeeper/data/
echo 3 >
myid修改node04 节点 myid
cd /usr/local/zookeeper/data/
echo 4 >
myid修改node05 节点 myid
cd /usr/local/zookeeper/data/
echo 5 >
myid
文章图片
启动zookeeper cd /usr/local/zookeeper/bin/./zkServer.sh start
文章图片
文章图片
文章图片
二:安装clickhouse20.x [所有节点都安装] 2.1 安装clickhouse
unzip clickhouse20-9-7-11.zip
cd clickhouse20-9-7-11
rpm -ivh clickhouse-*
文章图片
2.2 修改配置文件
2.2.2.1 修改config.xml
cd /etc/clickhouse-server/vim config.xml ------
<
database_atomic_delay_before_drop_table_sec>
0<
/database_atomic_delay_before_drop_table_sec>
<
include_from>
/etc/clickhouse-server/config.d/metrika.xml<
/include_from>
--------------<
listen_host>
::<
/listen_host>
----拿掉所有localhost的本地存储shared 分片不做显示:
<
remote_servers incl="clickhouse_remote_servers" >
......#### 全部注释掉
<
/remote_servers>
文章图片
文章图片
文章图片
文章图片
文章图片
vim users.xml-----
<
password>
tzck123.com<
/password>
<
ip>
::/0<
/ip>
-----
文章图片
2.2.2.2 修改metrika.xml
cd /etc/clickhouse-server/config.dvim metrika.xml----------------------
<
yandex>
<
clickhouse_remote_servers>
<
!--定义分片节点,这里我指定 3 个分片,每个分片有 2 个副本,01+02,03+04,05+06-->
<
tzcluster3s2r02>
<
shard>
<
weight>
1<
/weight>
<
internal_replication>
true<
/internal_replication>
<
replica>
<
host>
node01<
/host>
<
port>
9000<
/port>
<
user>
default<
/user>
<
password>
tzck123.com<
/password>
<
/replica>
<
replica>
<
host>
node02<
/host>
<
port>
9000<
/port>
<
user>
default<
/user>
<
password>
tzck123.com<
/password>
<
/replica>
<
/shard>
<
shard>
<
weight>
1<
/weight>
<
internal_replication>
true<
/internal_replication>
<
replica>
<
host>
node03<
/host>
<
port>
9000<
/port>
<
user>
default<
/user>
<
password>
tzck123.com<
/password>
<
/replica>
<
replica>
<
host>
node04<
/host>
<
port>
9000<
/port>
<
user>
default<
/user>
<
password>
tzck123.com<
/password>
<
/replica>
<
/shard>
<
shard>
<
weight>
1<
/weight>
<
internal_replication>
true<
/internal_replication>
<
replica>
<
host>
node05<
/host>
<
port>
9000<
/port>
<
user>
default<
/user>
<
password>
tzck123.com<
/password>
<
/replica>
<
replica>
<
host>
node06<
/host>
<
port>
9000<
/port>
<
user>
default<
/user>
<
password>
tzck123.com<
/password>
<
/replica>
<
/shard>
<
/tzcluster3s2r02>
<
/clickhouse_remote_servers>
<
zookeeper-servers>
<
node index="1">
<
host>
node02<
/host>
<
port>
2181<
/port>
<
/node>
<
node index="2">
<
host>
node03<
/host>
<
port>
2181<
/port>
<
/node>
<
node index="3">
<
host>
node04<
/host>
<
port>
2181<
/port>
<
/node>
<
node index="4">
<
host>
node05<
/host>
<
port>
2181<
/port>
<
/node>
<
node index="5">
<
host>
node06<
/host>
<
port>
2181<
/port>
<
/node>
<
/zookeeper-servers>
<
!--定义宏变量,后面需要用-->
<
!-- 以下的配置根据节点的 IP/域名具体配置,配置文件中 macros 若省略,则建复制表时每个分
片需指定 zookeeper 路径及副本名称,
同 一 分 片 上 路 径 相 同 , 副 本 名 称 不 同 ; 若 不 省 略 需 每 个 分 片 不 同 配
置,layer-shard-replica-->
<
macros>
<
!-- <
replica>
node01.02<
/replica>
-->
<
layer>
02<
/layer>
<
shard>
01<
/shard>
<
replica>
tzcluster3s2r02_01_01<
/replica>
<
/macros>
<
!--不限制访问来源 ip 地址-->
<
networks>
<
ip>
::/0<
/ip>
<
/networks>
<
!--数据压缩方式,默认为 lz4-->
<
clickhouse_compression>
<
case>
<
min_part_size>
10000000000<
/min_part_size>
<
min_part_size_ratio>
0.01<
/min_part_size_ratio>
<
method>
lz4<
/method>
<
/case>
<
/clickhouse_compression>
<
/yandex>
-----------------------
同步: 所有主机
scp metrika.xml root@node02:/etc/clickhouse-server/config.d/scp metrika.xml root@node03:/etc/clickhouse-server/config.d/scp metrika.xml root@node04:/etc/clickhouse-server/config.d/scp metrika.xml root@node05:/etc/clickhouse-server/config.d/scp metrika.xml root@node06:/etc/clickhouse-server/config.d/
文章图片
文章图片
文章图片
其它主机依次修改macos定义宏 node02:
-----
<
macros>
<
!-- <
replica>
node01.02<
/replica>
-->
<
layer>
02<
/layer>
<
shard>
01<
/shard>
<
replica>
tzcluster3s2r02_01_02<
/replica>
<
/macros>
----node03:----
<
macros>
<
!-- <
replica>
node03.04<
/replica>
-->
<
layer>
02<
/layer>
<
shard>
02<
/shard>
<
replica>
tzcluster3s2r02_02_01<
/replica>
<
/macros>
----node04:----
<
macros>
<
!-- <
replica>
node03.04<
/replica>
-->
<
layer>
02<
/layer>
<
shard>
02<
/shard>
<
replica>
tzcluster3s2r02_02_02<
/replica>
<
/macros>
----node05:-----
<
macros>
<
!-- <
replica>
node05.06<
/replica>
-->
<
layer>
02<
/layer>
<
shard>
03<
/shard>
<
replica>
tzcluster3s2r02_03_01<
/replica>
<
/macros>
-----node06:-----
<
macros>
<
!-- <
replica>
node05.06<
/replica>
-->
<
layer>
02<
/layer>
<
shard>
03<
/shard>
<
replica>
tzcluster3s2r02_03_02<
/replica>
<
/macros>
----
2.2.2.3 启动clickhouse
systemctl stop clickhouse-server.service停机所有集群
systemctl start clickhouse-server.service所有节点全部启动
systemctl status clickhouse-server.service所有节点查看clickhouse 节点状态
文章图片
文章图片
三:验证集群 3.1 验证集群状态
验证:
clickhouse-client -h node01
查看集群:
select * from system.clusters;
select cluster,host_name from system.clusters;
文章图片
文章图片
文章图片
3.2 压测数据
参考官网压测数据:
https://clickhouse.com/docs/en/getting-started/example-datasets/brown-benchmarkhttps://datasets.clickhouse.com/mgbench1..3.csv.xz
文章图片
解压:
xz -v -d mgbench1..3.csv.xz
文章图片
建库:
create database mgbench on cluster tzcluster3s2r02 ;
文章图片
文章图片
USE mgbench;
CREATE TABLE mgbench.logs1 (
log_timeDateTime,
machine_nameLowCardinality(String),
machine_group LowCardinality(String),
cpu_idleNullable(Float32),
cpu_niceNullable(Float32),
cpu_systemNullable(Float32),
cpu_userNullable(Float32),
cpu_wioNullable(Float32),
disk_freeNullable(Float32),
disk_totalNullable(Float32),
part_max_used Nullable(Float32),
load_fifteenNullable(Float32),
load_fiveNullable(Float32),
load_oneNullable(Float32),
mem_buffersNullable(Float32),
mem_cachedNullable(Float32),
mem_freeNullable(Float32),
mem_sharedNullable(Float32),
swap_freeNullable(Float32),
bytes_inNullable(Float32),
bytes_outNullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (machine_group, machine_name, log_time);
文章图片
CREATE TABLE mgbench.logs2 (
log_timeDateTime,
client_ipIPv4,
requestString,
status_code UInt16,
object_size UInt64
)
ENGINE = MergeTree()
ORDER BY log_time;
文章图片
CREATE TABLE mgbench.logs3 (
log_timeDateTime64,
device_idFixedString(15),
device_nameLowCardinality(String),
device_typeLowCardinality(String),
device_floor UInt8,
event_typeLowCardinality(String),
event_unitFixedString(1),
event_valueNullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (event_type, log_time);
文章图片
数据导入
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" <
mgbench1.csv
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" <
mgbench2.csv
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" <
mgbench3.csv带用户密码导入
clickhouse-client -h node01 -u default -d mgbench --password tzck123.com --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" <
mgbench1.csvclickhouse-client -h node01 -u default -d mgbench --password tzck123.com --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" <
mgbench2.csvclickhouse-client -h node01 -u default -d mgbench --password tzck123.com --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" <
mgbench3.csv
文章图片
文章图片
文章图片
-- Q1.1: What is the CPU/network utilization for each web server since midnight?SELECT machine_name,
MIN(cpu) AS cpu_min,
MAX(cpu) AS cpu_max,
AVG(cpu) AS cpu_avg,
MIN(net_in) AS net_in_min,
MAX(net_in) AS net_in_max,
AVG(net_in) AS net_in_avg,
MIN(net_out) AS net_out_min,
MAX(net_out) AS net_out_max,
AVG(net_out) AS net_out_avg
FROM (
SELECT machine_name,
COALESCE(cpu_user, 0.0) AS cpu,
COALESCE(bytes_in, 0.0) AS net_in,
COALESCE(bytes_out, 0.0) AS net_out
FROM logs1
WHERE machine_name IN (anansi,aragog,urd)
AND log_time >
= TIMESTAMP 2017-01-11 00:00:00
) AS r
GROUP BY machine_name;
文章图片
-- Q1.2: Which computer lab machines have been offline in the past day?SELECT machine_name,
log_time
FROM logs1
WHERE (machine_name LIKE cslab% OR
machine_name LIKE mslab%)
AND load_one IS NULL
AND log_time >
= TIMESTAMP 2017-01-10 00:00:00
ORDER BY machine_name,
log_time;
文章图片
-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?SELECT dt,
hr,
AVG(load_fifteen) AS load_fifteen_avg,
AVG(load_five) AS load_five_avg,
AVG(load_one) AS load_one_avg,
AVG(mem_free) AS mem_free_avg,
AVG(swap_free) AS swap_free_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
load_fifteen,
load_five,
load_one,
mem_free,
swap_free
FROM logs1
WHERE machine_name = babbage
AND load_fifteen IS NOT NULL
AND load_five IS NOT NULL
AND load_one IS NOT NULL
AND mem_free IS NOT NULL
AND swap_free IS NOT NULL
AND log_time >
= TIMESTAMP 2017-01-01 00:00:00
) AS r
GROUP BY dt,
hr
ORDER BY dt,
hr;
文章图片
文章图片
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?SELECT machine_name,
COUNT(*) AS spikes
FROM logs1
WHERE machine_group = Servers
AND cpu_wio >
0.99
AND log_time >
= TIMESTAMP 2016-12-01 00:00:00
AND log_time <
TIMESTAMP 2017-01-01 00:00:00
GROUP BY machine_name
ORDER BY spikes DESC
LIMIT 10;
文章图片
文章图片
-- Q1.5: Which externally reachable VMs have run low on memory?SELECT machine_name,
dt,
MIN(mem_free) AS mem_free_min
FROM (
SELECT machine_name,
CAST(log_time AS DATE) AS dt,
mem_free
FROM logs1
WHERE machine_group = DMZ
AND mem_free IS NOT NULL
) AS r
GROUP BY machine_name,
dt
HAVING MIN(mem_free) <
10000
ORDER BY machine_name,
dt;
文章图片
文章图片
-- Q1.6: What is the total hourly network traffic across all file servers?SELECT dt,
hr,
SUM(net_in) AS net_in_sum,
SUM(net_out) AS net_out_sum,
SUM(net_in) + SUM(net_out) AS both_sum
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
FROM logs1
WHERE machine_name IN (allsorts,andes,bigred,blackjack,bonbon,
cadbury,chiclets,cotton,crows,dove,fireball,hearts,huey,
lindt,milkduds,milkyway,mnm,necco,nerds,orbit,peeps,
poprocks,razzles,runts,smarties,smuggler,spree,stride,
tootsie,trident,wrigley,york)
) AS r
GROUP BY dt,
hr
ORDER BY both_sum DESC
LIMIT 10;
【clickhouse 20.x 三分片两副本部署与本地表的压力测试】
文章图片
文章图片
推荐阅读
- 023.电子书列表查询接口的开发
- 005.Maven参数配置
- 问题 K: 最勇敢的机器人(并查集+背包)
- Prime Distance(区间筛)
- 004.maven概念模型图
- 测试基础|如何自学软件测试、包装简历实现华丽转行()
- IT|怎么都在叫做软件测试(我做软件测试工作的两大乐趣)
- Git的分支简单使用
- #导入Word文档图片# 根文件系统制作与挂载