Commit f1f2554f authored by jan.koester's avatar jan.koester
Browse files

test

parent f8539f9a
Loading
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -582,6 +582,8 @@ namespace authdb {
        size_t k = cfg_.data_blocks;
        size_t n = cfg_.data_blocks + cfg_.parity_blocks;
        bool first_run = true;
        int healthy_cycles = 0;             // counts consecutive healthy probes
        static constexpr int REBALANCE_INTERVAL = 30; // rebalance every ~5min (30 × 10s)
        while (running_) {
            if (!first_run) {
                // Probe faster when degraded/critical (3s) vs normal (10s)
@@ -643,9 +645,27 @@ namespace authdb {
                        std::thread([this](){
                            try { scrub(); } catch (...) {}
                        }).detach();
                        healthy_cycles = 0;
                    }
                    degraded_ = false;
                    critical_ = false;

                    // Periodic rebalance while healthy: fixes misplaced blocks
                    // from transient store_stripe timeouts that didn't trigger
                    // a full degraded→recovered transition.
                    if (++healthy_cycles >= REBALANCE_INTERVAL) {
                        healthy_cycles = 0;
                        std::thread([this](){
                            try {
                                auto rb = pclient_->rebalance();
                                if (rb.rebalanced > 0) {
                                    std::cerr << "Cluster: periodic rebalance moved "
                                              << rb.rebalanced << " group(s)" << std::endl;
                                    pclient_->vacuum_all_nodes();
                                }
                            } catch (...) {}
                        }).detach();
                    }
                } else if (health.nodes_online >= k) {
                    if (was_critical) {
                        std::cerr << "Cluster: recovering — " << health.nodes_online
@@ -656,6 +676,7 @@ namespace authdb {
                    }
                    degraded_ = true;
                    critical_ = false;
                    healthy_cycles = 0;
                } else {
                    if (!critical_) {
                        std::cerr << "Cluster: CRITICAL — only " << health.nodes_online