pgcopydbの検証を過去にしたものの、Azure Database for PostgreSQL Flexible Server間での移行はしたことが無かったので、改めて検証してみた。作業するクライアントはmacOSを前提にしているものの、他の環境でもそれほど変わらないはず。
事前準備
まず、いくつか必須のものがある。
- Docker DesktopもしくはDocker
- psql
- Azure CLI
Homebrew環境であれば、サクッとインストールできるはず。
brew install --cask docker-desktop
brew install postgresql azure-cliオプションで、テスト用のダミーデータを生成する拙作のpgsalesgenをインストールする。
brew install rioriost/pgsalesgen/pgsalesgenで、Azureにログインしたら、準備完了。
az login環境変数の設定
デプロイスクリプト、ダミデータのロード、pgcopydbの実行時に参照されるので、最初に設定しておく。
export PG_ADMIN_USER='your_account'
export PG_ADMIN_PASS='your_password'デプロイスクリプトの実行
もちろん手動でも良いし、Azure Developer CLIでBicepからデプロイしても構わない。
#!/bin/bash
set -euo pipefail
# ---- common settings ----
LOCATION="japaneast"
ADMIN_USER="${PG_ADMIN_USER:?Set PG_ADMIN_USER env var}"
ADMIN_PASS="${PG_ADMIN_PASS:?Set PG_ADMIN_PASS env var}"
CLIENT_IP="$(curl -4 -s https://ifconfig.co | tr -d '\n')"
# Server spec
SRC_PG_VERSION="17"
TGT_PG_VERSION="17"
TIER="GeneralPurpose"
SKU_NAME="Standard_D8ds_v4"
STORAGE_SIZE_GIB="512"
PERF_TIER="P40"
ENTRA_AUTH="Disabled"
PWD_AUTH="Enabled"
# Retry / timeout defaults
CONNECT_MAX_TRIES="${CONNECT_MAX_TRIES:-10}" # 10回まで
CONNECT_TIMEOUT_SEC="${CONNECT_TIMEOUT_SEC:-20}"
log() { echo "[$(date +'%F %T')] $*"; }
# ---- az cli version gate ----
AZ_VERSION="$(az version --query '"azure-cli"' -o tsv)"
version_ge() {
# $1 >= $2
printf '%s\n%s\n' "$2" "$1" | sort -V -C
}
USE_FW_NEW_SYNTAX=0
if version_ge "$AZ_VERSION" "2.86.0"; then
USE_FW_NEW_SYNTAX=1
fi
# ---- bash timeout helper ----
run_with_timeout() {
local seconds="$1"; shift
# GNU timeout があればそれを使う
if command -v timeout >/dev/null 2>&1; then
timeout "$seconds" "$@"
return $?
fi
# bash-only fallback
"$@" &
local cmd_pid=$!
(
sleep "$seconds"
if kill -0 "$cmd_pid" 2>/dev/null; then
kill "$cmd_pid" 2>/dev/null || true
sleep 1
kill -9 "$cmd_pid" 2>/dev/null || true
fi
) &
local watchdog_pid=$!
wait "$cmd_pid"
local rc=$?
kill "$watchdog_pid" 2>/dev/null || true
wait "$watchdog_pid" 2>/dev/null || true
# killされたっぽい終了コードを 124 に寄せる(GNU timeout互換)
if (( rc == 143 || rc == 137 )); then
return 124
fi
return "$rc"
}
# ---- prerequisites ----
ensure_psql() {
if command -v psql >/dev/null 2>&1; then
log "psql found: $(command -v psql)"
return 0
fi
echo
echo "psql command is not installed (required for connectivity check)."
echo
if command -v brew >/dev/null 2>&1; then
echo "On macOS, you can install it via Homebrew:"
echo " brew install libpq"
echo " brew link --force libpq"
echo
read -r -p "Install libpq (psql) now? [y/N]: " ans
case "${ans:-}" in
[yY]|[yY][eE][sS])
log "Installing libpq (psql) via Homebrew..."
brew install libpq
brew link --force libpq >/dev/null 2>&1 || true
if command -v psql >/dev/null 2>&1; then
log "psql installed: $(command -v psql)"
return 0
fi
log "Installed libpq, but 'psql' is still not in PATH."
log "Try: brew link --force libpq"
exit 1
;;
*)
log "Not installing psql. Exiting."
exit 1
;;
esac
else
log "Homebrew not found and psql is missing. Please install psql and retry."
exit 1
fi
}
# ---- helpers ----
ensure_rg() {
local rg="$1"
if az group exists -n "$rg" | grep -qi true; then
log "Resource group exists: $rg"
else
log "Creating resource group: $rg"
az group create -n "$rg" -l "$LOCATION" >/dev/null
fi
}
ensure_fw_rule() {
local rg="$1"
local server="$2"
local rule="$3"
local start_ip="$4"
local end_ip="${5:-$4}"
if (( USE_FW_NEW_SYNTAX == 1 )); then
# CLI 2.86.0+ : --server-name introduced, --name repurposed to firewall rule name
az postgres flexible-server firewall-rule create \
--resource-group "$rg" \
--server-name "$server" \
--name "$rule" \
--start-ip-address "$start_ip" \
--end-ip-address "$end_ip" \
>/dev/null
else
# Current syntax: --name is server name, --rule-name is firewall rule name
az postgres flexible-server firewall-rule create \
--resource-group "$rg" \
--name "$server" \
--rule-name "$rule" \
--start-ip-address "$start_ip" \
--end-ip-address "$end_ip" \
>/dev/null
fi
}
ensure_server() {
local rg="$1"
local server="$2"
local version="$3"
ensure_rg "$rg"
if az postgres flexible-server show -g "$rg" -n "$server" >/dev/null 2>&1; then
log "Server already exists: $server (rg=$rg) - skip create"
else
log "Creating server: $server (rg=$rg, location=$LOCATION, client_ip=$CLIENT_IP)"
az postgres flexible-server create \
--resource-group "$rg" \
--name "$server" \
--location "$LOCATION" \
--version "$version" \
--tier "$TIER" \
--sku-name "$SKU_NAME" \
--storage-size "$STORAGE_SIZE_GIB" \
--performance-tier "$PERF_TIER" \
--microsoft-entra-auth "$ENTRA_AUTH" \
--password-auth "$PWD_AUTH" \
--admin-user "$ADMIN_USER" \
--admin-password "$ADMIN_PASS" \
--public-access "$CLIENT_IP" \
>/dev/null
fi
log "Ensuring firewall rules for: $server (rg=$rg)"
ensure_fw_rule "$rg" "$server" "AllowAzureServices" "0.0.0.0" "0.0.0.0"
ensure_fw_rule "$rg" "$server" "AllowClientIP" "$CLIENT_IP" "$CLIENT_IP"
}
# ---- connectivity check via psql ----
check_pg_connect() {
local rg="$1" # ログ用(psqlには不要)
local server="$2"
local host="${server}.postgres.database.azure.com"
local db="postgres"
local user="$ADMIN_USER"
log "Checking connectivity via psql: $server (host=$host, rg=$rg)"
local out rc=0
out="$(
PGPASSWORD="$ADMIN_PASS" \
run_with_timeout "$CONNECT_TIMEOUT_SEC" \
psql "host=${host} port=5432 dbname=${db} user=${user} sslmode=require connect_timeout=5" \
-v ON_ERROR_STOP=1 \
-qAtc "select 1;" \
2>&1 >/dev/null
)" || rc=$?
if (( rc == 0 )); then
log "✅ Connection OK (psql): $server"
return 0
fi
if (( rc == 124 )); then
log "❌ Connection TIMEOUT after ${CONNECT_TIMEOUT_SEC}s (psql): $server"
else
log "❌ Connection FAILED (rc=$rc) (psql): $server"
fi
[[ -n "${out:-}" ]] && log " psql error: ${out}"
return 1
}
retry_connect() {
local rg="$1"
local server="$2"
local max_tries="${3:-$CONNECT_MAX_TRIES}"
# 指数バックオフ: 1,2,4,8,... 秒(最大 max_tries 回)
local sleep_s=1
for ((i=1; i<=max_tries; i++)); do
if check_pg_connect "$rg" "$server"; then
return 0
fi
if (( i == max_tries )); then
log "❌ Giving up connect after ${max_tries} tries: $server (rg=$rg)"
return 1
fi
log "Retrying connect ($i/$max_tries) in ${sleep_s}s... ($server)"
sleep "$sleep_s"
sleep_s=$(( sleep_s * 2 ))
done
}
main() {
ensure_psql
# label|rg|server|version
local servers=(
"src|rg-${ADMIN_USER}migsrc|${ADMIN_USER}migsrc|${SRC_PG_VERSION}"
"tgt|rg-${ADMIN_USER}migtgt|${ADMIN_USER}migtgt|${TGT_PG_VERSION}"
)
# Create/ensure servers
for s in "${servers[@]}"; do
IFS='|' read -r label rg server version <<< "$s"
log "Ensuring ${label} server: ${server} (rg=${rg}, version=${version})"
ensure_server "$rg" "$server" "$version"
done
# Connection checks (data-plane)
for s in "${servers[@]}"; do
IFS='|' read -r label rg server version <<< "$s"
log "Connectivity check for ${label} server: ${server}"
retry_connect "$rg" "$server" || exit 1
done
log "All servers are reachable. Done."
}
main "$@"deploy_server.shとして保存して、パーミッション設定、実行。
chmod +x deploy_server.sh
./deploy_server.shなんかメッセージが色々表示されるが、最後に以下のメッセージが出れば完了。
......
2026-02-06 17:48:14] Connectivity check for src server: riomigsrc
[2026-02-06 17:48:14] Checking connectivity via psql: riomigsrc (host=riomigsrc.postgres.database.azure.com, rg=rg-riomigsrc)
[2026-02-06 17:48:34] ✅ Connection OK (psql): riomigsrc
[2026-02-06 17:48:34] Connectivity check for tgt server: riomigtgt
[2026-02-06 17:48:34] Checking connectivity via psql: riomigtgt (host=riomigtgt.postgres.database.azure.com, rg=rg-riomigtgt)
[2026-02-06 17:48:55] ✅ Connection OK (psql): riomigtgt
[2026-02-06 17:48:55] All servers are reachable. Done.ダミーデータをロード
pgsalesgenを実行しても良いし、もちろんお手元にあるデータ等を流し込んでも構わない。
pgsalesgen -h ${PG_ADMIN_USER}migsrc.postgres.database.azure.com -U $PG_ADMIN_USER -d postgres --password $PG_ADMIN_PASSデフォルトだと87GBぐらいのデータを生成する。
[setup] creating schema/tables...
[setup] inserting masters: customers=2,000,000 products=200,000 ...
[progress] 0.00 GB / 100.00 GB
[progress] 2.96 GB / 100.00 GB
[progress] 6.22 GB / 100.00 GB
[progress] 9.22 GB / 100.00 GB
[progress] 12.08 GB / 100.00 GB
[progress] 14.80 GB / 100.00 GB
[progress] 17.62 GB / 100.00 GB
[progress] 20.33 GB / 100.00 GB
......
[progress] 84.41 GB / 100.00 GB
[progress] 87.13 GB / 100.00 GB
[progress] 89.98 GB / 100.00 GB
[progress] 92.65 GB / 100.00 GB
[progress] 95.48 GB / 100.00 GB
[progress] 98.24 GB / 100.00 GB
[progress] 100.87 GB / 100.00 GB
[done] target reached; stopping workers...pgcopydbを実行する
本家のpgcopydbはPostgreSQL 16までしか対応しておらず、17/18に対応させるPRは存在するが、マージされてない。
そこで、当該PRが含まれるフォークをさらにフォークしてDockerfileを修正し、ビルドしたイメージをDocker Hubに上げておいた。
docker run \
-e PGCOPYDB_SOURCE_PGURI="postgres://${PG_ADMIN_USER}:${PG_ADMIN_PASS}@${PG_ADMIN_USER}migsrc.postgres.database.azure.com/postgres" \
-e PGCOPYDB_TARGET_PGURI="postgres://${PG_ADMIN_USER}:${PG_ADMIN_PASS}@${PG_ADMIN_USER}migtgt.postgres.database.azure.com/postgres" \
--rm -it rioriost/pgcopydb:0.18 \
clone --skip-extensions --drop-if-exists --no-owner --no-acl最後の行が、pgcopydbのコマンド。実行すると、以下のようにsrcからtgtにクローンされる。
09:25:01.301 1 INFO Running pgcopydb version 0.17 from "/usr/local/bin/pgcopydb"
09:25:01.321 1 INFO [SOURCE] Copying database from "postgres://rio@riomigsrc.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
09:25:01.321 1 INFO [TARGET] Copying database into "postgres://rio@riomigtgt.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
09:25:01.433 1 INFO Using work dir "/tmp/pgcopydb"
09:25:01.631 1 INFO Exported snapshot "00000238-00000002-1" from the source database
09:25:01.656 23 INFO STEP 1: fetch source database tables, indexes, and sequences
09:25:01.860 23 INFO Fetched information for 4 tables (including 0 tables split in 0 partitions total), with an estimated total of 116 million tuples and 87 GB on-disk
09:25:01.873 23 INFO Fetched information for 4 indexes (supporting 4 constraints)
09:25:01.877 23 INFO Fetching information for 3 sequences
09:25:01.925 23 INFO Fetched information for 4 extensions
09:25:02.056 23 INFO Found 0 indexes (supporting 0 constraints) in the target database
09:25:02.057 23 INFO STEP 2: dump the source database schema (pre/post data)
09:25:02.058 23 INFO /usr/bin/pg_dump -Fc --snapshot 00000238-00000002-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://rio@riomigsrc.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
09:25:02.935 23 INFO STEP 3: restore the pre-data section to the target database
09:25:03.103 23 INFO Drop tables on the target database, per --drop-if-exists
09:25:03.163 23 INFO /usr/bin/pg_restore --dbname 'postgres://rio@riomigtgt.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --clean --if-exists --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
09:25:03.816 31 INFO STEP 4: starting 4 table-data COPY processes
09:25:03.821 34 INFO STEP 8: starting 4 VACUUM processes
09:25:03.822 32 INFO STEP 6: starting 4 CREATE INDEX processes
09:25:03.822 32 INFO STEP 7: constraints are built by the CREATE INDEX processes
09:25:03.931 23 INFO Skipping large objects: none found.
09:25:03.933 23 INFO STEP 9: reset sequences values
09:25:03.934 47 INFO Set sequences values on the target database
09:37:37.079 23 INFO STEP 10: restore the post-data section to the target database
09:37:37.198 23 INFO /usr/bin/pg_restore --dbname 'postgres://rio@riomigtgt.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --clean --if-exists --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
09:37:37.704 23 INFO All step are now done, 12m35s elapsed
09:37:37.704 23 INFO Printing summary for 4 tables and 4 indexes
OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration
------+--------+-------------+-------+---------------+-------------------+---------+----------------------
24837 | sales | order_items | 1 | 8m38s | 58 GB | 1 | 3m51s
24829 | sales | orders | 1 | 3m34s | 17 GB | 1 | 1m29s
24811 | sales | customers | 1 | 2s735 | 154 MB | 1 | 455ms
24821 | sales | products | 1 | 206ms | 8599 kB | 1 | 67ms
Step Connection Duration Transfer Concurrency
-------------------------------------------------- ---------- ---------- ---------- ------------
Catalog Queries (table ordering, filtering, etc) source 340ms 1
Dump Schema source 877ms 1
Prepare Schema target 871ms 1
COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 12m33s 12
COPY (cumulative) both 12m15s 87 GB 4
CREATE INDEX (cumulative) target 5m20s 4
CONSTRAINTS (cumulative) target 27ms 4
VACUUM (cumulative) target 5s495 4
Reset Sequences both 68ms 1
Large Objects (cumulative) (null) 0ms 0
Finalize Schema both 622ms 4
-------------------------------------------------- ---------- ---------- ---------- ------------
Total Wall Clock Duration both 12m35s 2087GBのクローンで12分35秒。まずまずといったところ。
まとめ
PostgreSQLのエクステンション、例えば、pgvectorやApache AGE、を利用している構成は未確認ながら、普通のスキーマ・テーブルであれば問題なく移行出来た。pgcopydbには様々なオプションがあり、データ移行の前にアセスメントしてから、といったことも可能。
