shell分批导入数据到clickhouse
脚本
#!/bin/bash
# 定义ClickHouse连接参数
CLICKHOUSE_HOST="127.0.0.1"
CLICKHOUSE_PORT="9000"
CLICKHOUSE_DATABASE="xxx"
CLICKHOUSE_USER=""
CLICKHOUSE_PASSWORD=""
# 定义每个批次的大小(可根据需要调整)
BATCH_SIZE=100000
# 定义SQL文件路径
SQL_FILE="/root/sql/sql.sql"
# 计算SQL文件的总行数
TOTAL_LINES=$(wc -l < "$SQL_FILE")
echo "Total lines in SQL file: $TOTAL_LINES"
# 计算需要执行的批次数量
TOTAL_BATCHES=$((TOTAL_LINES / BATCH_SIZE + 1))
echo "Total batches to execute: $TOTAL_BATCHES"
# 循环执行每个批次的导入
for i in $(seq 0 $((TOTAL_BATCHES - 1))); do
# 计算当前批次的起始行号和结束行号
START_LINE=$((i * BATCH_SIZE + 1))
END_LINE=$(((i + 1) * BATCH_SIZE))
if [ "$END_LINE" -gt "$TOTAL_LINES" ]; then
END_LINE=$TOTAL_LINES
fi
# 提取当前批次的SQL语句并保存到临时文件中
awk "NR >= $START_LINE && NR <= $END_LINE" "$SQL_FILE" > batch_${i}.sql
# 导入当前批次的数据到ClickHouse
echo "Importing batch $i (lines $START_LINE to $END_LINE)..."
#cat batch_${i}.sql | clickhouse-client -h "$CLICKHOUSE_HOST" --port "$CLICKHOUSE_PORT"
clickhouse-client -h "$CLICKHOUSE_HOST" --port "$CLICKHOUSE_PORT" --query="INSERT INTO xx.xx FORMAT MySQLDump" < batch_${i}.sql
# 检查上一步操作是否成功
if [ $? -eq 0 ]; then
echo "Batch $i import completed successfully."
else
echo "Error: Batch $i import failed."
exit 1
fi
# 删除临时文件
rm -f batch_${i}.sql
done
echo "Import completed!"
这样,clickhouse-client 将执行SQL文件中指定的数据库和表上的操作。