Loading src/cluster.cpp +21 −0 Original line number Diff line number Diff line Loading @@ -582,6 +582,8 @@ namespace authdb { size_t k = cfg_.data_blocks; size_t n = cfg_.data_blocks + cfg_.parity_blocks; bool first_run = true; int healthy_cycles = 0; // counts consecutive healthy probes static constexpr int REBALANCE_INTERVAL = 30; // rebalance every ~5min (30 × 10s) while (running_) { if (!first_run) { // Probe faster when degraded/critical (3s) vs normal (10s) Loading Loading @@ -643,9 +645,27 @@ namespace authdb { std::thread([this](){ try { scrub(); } catch (...) {} }).detach(); healthy_cycles = 0; } degraded_ = false; critical_ = false; // Periodic rebalance while healthy: fixes misplaced blocks // from transient store_stripe timeouts that didn't trigger // a full degraded→recovered transition. if (++healthy_cycles >= REBALANCE_INTERVAL) { healthy_cycles = 0; std::thread([this](){ try { auto rb = pclient_->rebalance(); if (rb.rebalanced > 0) { std::cerr << "Cluster: periodic rebalance moved " << rb.rebalanced << " group(s)" << std::endl; pclient_->vacuum_all_nodes(); } } catch (...) {} }).detach(); } } else if (health.nodes_online >= k) { if (was_critical) { std::cerr << "Cluster: recovering — " << health.nodes_online Loading @@ -656,6 +676,7 @@ namespace authdb { } degraded_ = true; critical_ = false; healthy_cycles = 0; } else { if (!critical_) { std::cerr << "Cluster: CRITICAL — only " << health.nodes_online Loading Loading
src/cluster.cpp +21 −0 Original line number Diff line number Diff line Loading @@ -582,6 +582,8 @@ namespace authdb { size_t k = cfg_.data_blocks; size_t n = cfg_.data_blocks + cfg_.parity_blocks; bool first_run = true; int healthy_cycles = 0; // counts consecutive healthy probes static constexpr int REBALANCE_INTERVAL = 30; // rebalance every ~5min (30 × 10s) while (running_) { if (!first_run) { // Probe faster when degraded/critical (3s) vs normal (10s) Loading Loading @@ -643,9 +645,27 @@ namespace authdb { std::thread([this](){ try { scrub(); } catch (...) {} }).detach(); healthy_cycles = 0; } degraded_ = false; critical_ = false; // Periodic rebalance while healthy: fixes misplaced blocks // from transient store_stripe timeouts that didn't trigger // a full degraded→recovered transition. if (++healthy_cycles >= REBALANCE_INTERVAL) { healthy_cycles = 0; std::thread([this](){ try { auto rb = pclient_->rebalance(); if (rb.rebalanced > 0) { std::cerr << "Cluster: periodic rebalance moved " << rb.rebalanced << " group(s)" << std::endl; pclient_->vacuum_all_nodes(); } } catch (...) {} }).detach(); } } else if (health.nodes_online >= k) { if (was_critical) { std::cerr << "Cluster: recovering — " << health.nodes_online Loading @@ -656,6 +676,7 @@ namespace authdb { } degraded_ = true; critical_ = false; healthy_cycles = 0; } else { if (!critical_) { std::cerr << "Cluster: CRITICAL — only " << health.nodes_online Loading