pgcopydb再び

pgcopydbの検証を過去にしたものの、Azure Database for PostgreSQL Flexible Server間での移行はしたことが無かったので、改めて検証してみた。作業するクライアントはmacOSを前提にしているものの、他の環境でもそれほど変わらないはず。

事前準備

まず、いくつか必須のものがある。

  • Docker DesktopもしくはDocker
  • psql
  • Azure CLI

Homebrew環境であれば、サクッとインストールできるはず。

brew install --cask docker-desktop
brew install postgresql azure-cli

オプションで、テスト用のダミーデータを生成する拙作のpgsalesgenをインストールする。

brew install rioriost/pgsalesgen/pgsalesgen

で、Azureにログインしたら、準備完了。

az login

環境変数の設定

デプロイスクリプト、ダミデータのロード、pgcopydbの実行時に参照されるので、最初に設定しておく。

export PG_ADMIN_USER='your_account'
export PG_ADMIN_PASS='your_password'

デプロイスクリプトの実行

もちろん手動でも良いし、Azure Developer CLIでBicepからデプロイしても構わない。

#!/bin/bash
set -euo pipefail

# ---- common settings ----
LOCATION="japaneast"
ADMIN_USER="${PG_ADMIN_USER:?Set PG_ADMIN_USER env var}"
ADMIN_PASS="${PG_ADMIN_PASS:?Set PG_ADMIN_PASS env var}"
CLIENT_IP="$(curl -4 -s https://ifconfig.co | tr -d '\n')"

# Server spec
SRC_PG_VERSION="17"
TGT_PG_VERSION="17"
TIER="GeneralPurpose"
SKU_NAME="Standard_D8ds_v4"
STORAGE_SIZE_GIB="512"
PERF_TIER="P40"
ENTRA_AUTH="Disabled"
PWD_AUTH="Enabled"

# Retry / timeout defaults
CONNECT_MAX_TRIES="${CONNECT_MAX_TRIES:-10}"   # 10回まで
CONNECT_TIMEOUT_SEC="${CONNECT_TIMEOUT_SEC:-20}"

log() { echo "[$(date +'%F %T')] $*"; }

# ---- az cli version gate ----
AZ_VERSION="$(az version --query '"azure-cli"' -o tsv)"

version_ge() {
    # $1 >= $2
    printf '%s\n%s\n' "$2" "$1" | sort -V -C
}

USE_FW_NEW_SYNTAX=0
if version_ge "$AZ_VERSION" "2.86.0"; then
    USE_FW_NEW_SYNTAX=1
fi

# ---- bash timeout helper ----
run_with_timeout() {
    local seconds="$1"; shift

    # GNU timeout があればそれを使う
    if command -v timeout >/dev/null 2>&1; then
        timeout "$seconds" "$@"
        return $?
    fi

    # bash-only fallback
    "$@" &
    local cmd_pid=$!

    (
    sleep "$seconds"
    if kill -0 "$cmd_pid" 2>/dev/null; then
        kill "$cmd_pid" 2>/dev/null || true
        sleep 1
        kill -9 "$cmd_pid" 2>/dev/null || true
    fi
    ) &
    local watchdog_pid=$!

    wait "$cmd_pid"
    local rc=$?

    kill "$watchdog_pid" 2>/dev/null || true
    wait "$watchdog_pid" 2>/dev/null || true

    # killされたっぽい終了コードを 124 に寄せる(GNU timeout互換)
    if (( rc == 143 || rc == 137 )); then
        return 124
    fi
    return "$rc"
}

# ---- prerequisites ----
ensure_psql() {
    if command -v psql >/dev/null 2>&1; then
        log "psql found: $(command -v psql)"
        return 0
    fi

    echo
    echo "psql command is not installed (required for connectivity check)."
    echo

    if command -v brew >/dev/null 2>&1; then
        echo "On macOS, you can install it via Homebrew:"
        echo "  brew install libpq"
        echo "  brew link --force libpq"
        echo
        read -r -p "Install libpq (psql) now? [y/N]: " ans
        case "${ans:-}" in
            [yY]|[yY][eE][sS])
                log "Installing libpq (psql) via Homebrew..."
                brew install libpq
                brew link --force libpq >/dev/null 2>&1 || true

                if command -v psql >/dev/null 2>&1; then
                    log "psql installed: $(command -v psql)"
                    return 0
                fi

                log "Installed libpq, but 'psql' is still not in PATH."
                log "Try: brew link --force libpq"
                exit 1
                ;;
            *)
                log "Not installing psql. Exiting."
                exit 1
                ;;
        esac
    else
        log "Homebrew not found and psql is missing. Please install psql and retry."
        exit 1
    fi
}

# ---- helpers ----
ensure_rg() {
    local rg="$1"
    if az group exists -n "$rg" | grep -qi true; then
        log "Resource group exists: $rg"
    else
        log "Creating resource group: $rg"
        az group create -n "$rg" -l "$LOCATION" >/dev/null
    fi
}

ensure_fw_rule() {
    local rg="$1"
    local server="$2"
    local rule="$3"
    local start_ip="$4"
    local end_ip="${5:-$4}"

    if (( USE_FW_NEW_SYNTAX == 1 )); then
        # CLI 2.86.0+ : --server-name introduced, --name repurposed to firewall rule name
        az postgres flexible-server firewall-rule create \
            --resource-group "$rg" \
            --server-name "$server" \
            --name "$rule" \
            --start-ip-address "$start_ip" \
            --end-ip-address "$end_ip" \
            >/dev/null
    else
        # Current syntax: --name is server name, --rule-name is firewall rule name
        az postgres flexible-server firewall-rule create \
            --resource-group "$rg" \
            --name "$server" \
            --rule-name "$rule" \
            --start-ip-address "$start_ip" \
            --end-ip-address "$end_ip" \
            >/dev/null
    fi
}

ensure_server() {
    local rg="$1"
    local server="$2"
    local version="$3"

    ensure_rg "$rg"

    if az postgres flexible-server show -g "$rg" -n "$server" >/dev/null 2>&1; then
        log "Server already exists: $server (rg=$rg) - skip create"
    else
        log "Creating server: $server (rg=$rg, location=$LOCATION, client_ip=$CLIENT_IP)"
        az postgres flexible-server create \
            --resource-group "$rg" \
            --name "$server" \
            --location "$LOCATION" \
            --version "$version" \
            --tier "$TIER" \
            --sku-name "$SKU_NAME" \
            --storage-size "$STORAGE_SIZE_GIB" \
            --performance-tier "$PERF_TIER" \
            --microsoft-entra-auth "$ENTRA_AUTH" \
            --password-auth "$PWD_AUTH" \
            --admin-user "$ADMIN_USER" \
            --admin-password "$ADMIN_PASS" \
            --public-access "$CLIENT_IP" \
            >/dev/null
    fi

    log "Ensuring firewall rules for: $server (rg=$rg)"
    ensure_fw_rule "$rg" "$server" "AllowAzureServices" "0.0.0.0" "0.0.0.0"
    ensure_fw_rule "$rg" "$server" "AllowClientIP" "$CLIENT_IP" "$CLIENT_IP"
}

# ---- connectivity check via psql ----
check_pg_connect() {
    local rg="$1"      # ログ用(psqlには不要)
    local server="$2"

    local host="${server}.postgres.database.azure.com"
    local db="postgres"
    local user="$ADMIN_USER"

    log "Checking connectivity via psql: $server (host=$host, rg=$rg)"

    local out rc=0
    out="$(
    PGPASSWORD="$ADMIN_PASS" \
    run_with_timeout "$CONNECT_TIMEOUT_SEC" \
        psql "host=${host} port=5432 dbname=${db} user=${user} sslmode=require connect_timeout=5" \
        -v ON_ERROR_STOP=1 \
        -qAtc "select 1;" \
        2>&1 >/dev/null
    )" || rc=$?

    if (( rc == 0 )); then
        log "✅ Connection OK (psql): $server"
        return 0
    fi

    if (( rc == 124 )); then
        log "❌ Connection TIMEOUT after ${CONNECT_TIMEOUT_SEC}s (psql): $server"
    else
        log "❌ Connection FAILED (rc=$rc) (psql): $server"
    fi

    [[ -n "${out:-}" ]] && log "    psql error: ${out}"
    return 1
}

retry_connect() {
    local rg="$1"
    local server="$2"
    local max_tries="${3:-$CONNECT_MAX_TRIES}"

    # 指数バックオフ: 1,2,4,8,... 秒(最大 max_tries 回)
    local sleep_s=1

    for ((i=1; i<=max_tries; i++)); do
        if check_pg_connect "$rg" "$server"; then
            return 0
        fi

        if (( i == max_tries )); then
            log "❌ Giving up connect after ${max_tries} tries: $server (rg=$rg)"
            return 1
        fi

        log "Retrying connect ($i/$max_tries) in ${sleep_s}s... ($server)"
        sleep "$sleep_s"
        sleep_s=$(( sleep_s * 2 ))
    done
}

main() {
    ensure_psql

    # label|rg|server|version
    local servers=(
        "src|rg-${ADMIN_USER}migsrc|${ADMIN_USER}migsrc|${SRC_PG_VERSION}"
        "tgt|rg-${ADMIN_USER}migtgt|${ADMIN_USER}migtgt|${TGT_PG_VERSION}"
    )

    # Create/ensure servers
    for s in "${servers[@]}"; do
        IFS='|' read -r label rg server version <<< "$s"
        log "Ensuring ${label} server: ${server} (rg=${rg}, version=${version})"
        ensure_server "$rg" "$server" "$version"
    done

    # Connection checks (data-plane)
    for s in "${servers[@]}"; do
        IFS='|' read -r label rg server version <<< "$s"
        log "Connectivity check for ${label} server: ${server}"
        retry_connect "$rg" "$server" || exit 1
    done

    log "All servers are reachable. Done."
}

main "$@"

deploy_server.shとして保存して、パーミッション設定、実行。

chmod +x deploy_server.sh
./deploy_server.sh

なんかメッセージが色々表示されるが、最後に以下のメッセージが出れば完了。

......
2026-02-06 17:48:14] Connectivity check for src server: riomigsrc
[2026-02-06 17:48:14] Checking connectivity via psql: riomigsrc (host=riomigsrc.postgres.database.azure.com, rg=rg-riomigsrc)
[2026-02-06 17:48:34] ✅ Connection OK (psql): riomigsrc
[2026-02-06 17:48:34] Connectivity check for tgt server: riomigtgt
[2026-02-06 17:48:34] Checking connectivity via psql: riomigtgt (host=riomigtgt.postgres.database.azure.com, rg=rg-riomigtgt)
[2026-02-06 17:48:55] ✅ Connection OK (psql): riomigtgt
[2026-02-06 17:48:55] All servers are reachable. Done.

ダミーデータをロード

pgsalesgenを実行しても良いし、もちろんお手元にあるデータ等を流し込んでも構わない。

pgsalesgen -h ${PG_ADMIN_USER}migsrc.postgres.database.azure.com -U $PG_ADMIN_USER -d postgres --password $PG_ADMIN_PASS

デフォルトだと87GBぐらいのデータを生成する。

[setup] creating schema/tables...
[setup] inserting masters: customers=2,000,000 products=200,000 ...
[progress] 0.00 GB / 100.00 GB
[progress] 2.96 GB / 100.00 GB
[progress] 6.22 GB / 100.00 GB
[progress] 9.22 GB / 100.00 GB
[progress] 12.08 GB / 100.00 GB
[progress] 14.80 GB / 100.00 GB
[progress] 17.62 GB / 100.00 GB
[progress] 20.33 GB / 100.00 GB
......
[progress] 84.41 GB / 100.00 GB
[progress] 87.13 GB / 100.00 GB
[progress] 89.98 GB / 100.00 GB
[progress] 92.65 GB / 100.00 GB
[progress] 95.48 GB / 100.00 GB
[progress] 98.24 GB / 100.00 GB
[progress] 100.87 GB / 100.00 GB
[done] target reached; stopping workers...

pgcopydbを実行する

本家のpgcopydbはPostgreSQL 16までしか対応しておらず、17/18に対応させるPRは存在するが、マージされてない。

そこで、当該PRが含まれるフォークをさらにフォークしてDockerfileを修正し、ビルドしたイメージをDocker Hubに上げておいた。

docker run \
  -e PGCOPYDB_SOURCE_PGURI="postgres://${PG_ADMIN_USER}:${PG_ADMIN_PASS}@${PG_ADMIN_USER}migsrc.postgres.database.azure.com/postgres" \
  -e PGCOPYDB_TARGET_PGURI="postgres://${PG_ADMIN_USER}:${PG_ADMIN_PASS}@${PG_ADMIN_USER}migtgt.postgres.database.azure.com/postgres" \
  --rm -it rioriost/pgcopydb:0.18 \
  clone --skip-extensions --drop-if-exists --no-owner --no-acl

最後の行が、pgcopydbのコマンド。実行すると、以下のようにsrcからtgtにクローンされる。

09:25:01.301 1 INFO   Running pgcopydb version 0.17 from "/usr/local/bin/pgcopydb"
09:25:01.321 1 INFO   [SOURCE] Copying database from "postgres://rio@riomigsrc.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
09:25:01.321 1 INFO   [TARGET] Copying database into "postgres://rio@riomigtgt.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
09:25:01.433 1 INFO   Using work dir "/tmp/pgcopydb"
09:25:01.631 1 INFO   Exported snapshot "00000238-00000002-1" from the source database
09:25:01.656 23 INFO   STEP 1: fetch source database tables, indexes, and sequences
09:25:01.860 23 INFO   Fetched information for 4 tables (including 0 tables split in 0 partitions total), with an estimated total of 116 million tuples and 87 GB on-disk
09:25:01.873 23 INFO   Fetched information for 4 indexes (supporting 4 constraints)
09:25:01.877 23 INFO   Fetching information for 3 sequences
09:25:01.925 23 INFO   Fetched information for 4 extensions
09:25:02.056 23 INFO   Found 0 indexes (supporting 0 constraints) in the target database
09:25:02.057 23 INFO   STEP 2: dump the source database schema (pre/post data)
09:25:02.058 23 INFO    /usr/bin/pg_dump -Fc --snapshot 00000238-00000002-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://rio@riomigsrc.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
09:25:02.935 23 INFO   STEP 3: restore the pre-data section to the target database
09:25:03.103 23 INFO   Drop tables on the target database, per --drop-if-exists
09:25:03.163 23 INFO    /usr/bin/pg_restore --dbname 'postgres://rio@riomigtgt.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --clean --if-exists --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
09:25:03.816 31 INFO   STEP 4: starting 4 table-data COPY processes
09:25:03.821 34 INFO   STEP 8: starting 4 VACUUM processes
09:25:03.822 32 INFO   STEP 6: starting 4 CREATE INDEX processes
09:25:03.822 32 INFO   STEP 7: constraints are built by the CREATE INDEX processes
09:25:03.931 23 INFO   Skipping large objects: none found.
09:25:03.933 23 INFO   STEP 9: reset sequences values
09:25:03.934 47 INFO   Set sequences values on the target database
09:37:37.079 23 INFO   STEP 10: restore the post-data section to the target database
09:37:37.198 23 INFO    /usr/bin/pg_restore --dbname 'postgres://rio@riomigtgt.postgres.database.azure.com/postgres?keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --clean --if-exists --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
09:37:37.704 23 INFO   All step are now done, 12m35s elapsed
09:37:37.704 23 INFO   Printing summary for 4 tables and 4 indexes

  OID | Schema |        Name | Parts | copy duration | transmitted bytes | indexes | create index duration
------+--------+-------------+-------+---------------+-------------------+---------+----------------------
24837 |  sales | order_items |     1 |         8m38s |             58 GB |       1 |                 3m51s
24829 |  sales |      orders |     1 |         3m34s |             17 GB |       1 |                 1m29s
24811 |  sales |   customers |     1 |         2s735 |            154 MB |       1 |                 455ms
24821 |  sales |    products |     1 |         206ms |           8599 kB |       1 |                  67ms


                                               Step   Connection    Duration    Transfer   Concurrency
 --------------------------------------------------   ----------  ----------  ----------  ------------
   Catalog Queries (table ordering, filtering, etc)       source       340ms                         1
                                        Dump Schema       source       877ms                         1
                                     Prepare Schema       target       871ms                         1
      COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)         both      12m33s                        12
                                  COPY (cumulative)         both      12m15s       87 GB             4
                          CREATE INDEX (cumulative)       target       5m20s                         4
                           CONSTRAINTS (cumulative)       target        27ms                         4
                                VACUUM (cumulative)       target       5s495                         4
                                    Reset Sequences         both        68ms                         1
                         Large Objects (cumulative)       (null)         0ms                         0
                                    Finalize Schema         both       622ms                         4
 --------------------------------------------------   ----------  ----------  ----------  ------------
                          Total Wall Clock Duration         both      12m35s                        20

87GBのクローンで12分35秒。まずまずといったところ。

まとめ

PostgreSQLのエクステンション、例えば、pgvectorやApache AGE、を利用している構成は未確認ながら、普通のスキーマ・テーブルであれば問題なく移行出来た。pgcopydbには様々なオプションがあり、データ移行の前にアセスメントしてから、といったことも可能。

タイトルとURLをコピーしました