6.3 Reconnect automático Evolution antes de abrir incident

Fluxo novo no heartbeat-check quando threshold vence: 1. Verifica se reconnect está habilitado (metadata.heartbeat_reconnect_ enabled, default true) E se API respondeu (sem fetchError) E se passou do cooldown de 10min desde a última tentativa. 2. POST /instance/restart/{instance} na Evolution. 3. Aguarda 3s pra estabilizar + rechecka connectionState. 4. Se state voltou pra 'open': restaura connected + limpa first_unhealthy_at + incrementa heartbeat_reconnect_count + resolve qualquer incident aberto. Retorna action='auto_reconnected'. 5. Senão: atualiza heartbeat_reconnect_last_at (respeita cooldown) e abre incident normalmente com details.reconnect_attempted=true. Anti-loop: 1 tentativa por ciclo (não retry), cooldown de 10min/channel pra não martelar Evolution nem gerar restart infinito. Tentativas são contadas em metadata.heartbeat_reconnect_count (auditoria futura). UI em /configuracoes/whatsapp-pessoal ganha novo toggle no card de Monitoramento: "Tentar reconectar automaticamente" (default ligado) com explicação clara. Tenant pode desligar se preferir ser alertado imediato sem tentativa. Summary do endpoint agora inclui auto_reconnected count — útil pra métricas de confiabilidade da Evolution. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 12:19:52 -03:00
parent 0f643817c2
commit 4e4bac622c
2 changed files with 102 additions and 7 deletions
@@ -35,6 +35,10 @@ const corsHeaders = {

 const DEFAULT_THRESHOLD_MINUTES = 5
 const FETCH_TIMEOUT_MS = 8000
+// Cooldown entre tentativas de reconnect por canal pra não martelar a Evolution
+const RECONNECT_COOLDOWN_MS = 10 * 60 * 1000
+// Quanto esperar depois de pedir restart antes de re-verificar o state
+const RECONNECT_SETTLE_MS = 3000

 function json(body: unknown, status = 200) {
    return new Response(JSON.stringify(body), {
@@ -99,8 +103,9 @@ async function checkOneChannel(supa: SupabaseClient, channel: ChannelRow, now: D
    channel_id: string
    previous_status: string | null
    new_status: string
-    action: 'ok' | 'opened' | 'resolved' | 'still_unhealthy' | 'no_change' | 'config_missing' | 'fetch_error'
+    action: 'ok' | 'opened' | 'resolved' | 'still_unhealthy' | 'no_change' | 'config_missing' | 'fetch_error' | 'auto_reconnected'
    incident_id?: string
+    reconnect_attempted?: boolean
 }> {
    const creds = channel.credentials || {}
    const apiUrl = String(creds.api_url || '').trim()
@@ -186,12 +191,85 @@ async function checkOneChannel(supa: SupabaseClient, channel: ChannelRow, now: D
        }
    }

-    // Passou do threshold — abre incident (idempotente)
+    // Passou do threshold — antes de abrir incident, tenta reconnect automático (6.3)
+    // Só tenta se: reconnect habilitado + API respondeu (sem fetchError) + passou do cooldown
+    const reconnectEnabled = meta.heartbeat_reconnect_enabled !== false
+    const lastReconnectAtRaw = meta.heartbeat_reconnect_last_at as string | undefined
+    const lastReconnectAt = lastReconnectAtRaw ? new Date(lastReconnectAtRaw) : null
+    const canReconnect = reconnectEnabled
+        && !fetchError
+        && (!lastReconnectAt || isNaN(lastReconnectAt.getTime()) || (now.getTime() - lastReconnectAt.getTime()) >= RECONNECT_COOLDOWN_MS)
+
+    let reconnectAttempted = false
+
+    if (canReconnect) {
+        reconnectAttempted = true
+        try {
+            // POST /instance/restart/{instance} — instrui Evolution a reconectar a sessão
+            await fetchWithTimeout(`${base}/instance/restart/${encodeURIComponent(instance)}`, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json', apikey: apiKey }
+            }, FETCH_TIMEOUT_MS)
+
+            // Aguarda estabilizar
+            await new Promise((r) => setTimeout(r, RECONNECT_SETTLE_MS))
+
+            // Rechecka o state
+            const res2 = await fetchWithTimeout(targetUrl, {
+                method: 'GET',
+                headers: { 'Content-Type': 'application/json', apikey: apiKey }
+            }, FETCH_TIMEOUT_MS)
+
+            if (res2.ok) {
+                const body2 = await res2.json().catch(() => null) as { instance?: { state?: string }, state?: string } | null
+                const state2 = body2?.instance?.state ?? body2?.state ?? null
+
+                if (state2 === 'open') {
+                    // Reconnect funcionou: restaura estado como se nunca tivesse caído
+                    const cleanedMeta = { ...newMeta }
+                    delete cleanedMeta.first_unhealthy_at
+                    cleanedMeta.heartbeat_reconnect_last_at = now.toISOString()
+                    cleanedMeta.heartbeat_reconnect_count = (Number(cleanedMeta.heartbeat_reconnect_count) || 0) + 1
+
+                    await supa.from('notification_channels').update({
+                        connection_status: 'connected',
+                        last_health_check: now.toISOString(),
+                        metadata: cleanedMeta
+                    }).eq('id', channel.id)
+
+                    // Resolve qualquer incident aberto desse channel (caso tenha sobrado de ciclo anterior)
+                    await supa.rpc('whatsapp_heartbeat_resolve_open_incidents', { p_channel_id: channel.id })
+
+                    return {
+                        tenant_id: channel.tenant_id,
+                        channel_id: channel.id,
+                        previous_status: channel.connection_status,
+                        new_status: 'connected',
+                        action: 'auto_reconnected',
+                        reconnect_attempted: true
+                    }
+                }
+            }
+        } catch {
+            // silencioso — reconnect falhou, segue fluxo de abrir incident
+        }
+
+        // Marca tentativa (mesmo que falhou) pra respeitar o cooldown
+        newMeta.heartbeat_reconnect_last_at = now.toISOString()
+        await supa.from('notification_channels').update({ metadata: newMeta }).eq('id', channel.id)
+    }
+
+    // Passou do threshold (e reconnect falhou / não tentou) — abre incident (idempotente)
+    const incidentDetails = {
+        ...(typeof rawBody === 'object' && rawBody ? rawBody : {}),
+        ...(fetchError ? { error: fetchError } : {}),
+        reconnect_attempted: reconnectAttempted
+    }
    const { data: incidentId, error: incidentErr } = await supa.rpc('whatsapp_heartbeat_open_incident', {
        p_channel_id: channel.id,
        p_kind: kind,
        p_last_state: state || fetchError,
-        p_details: rawBody || (fetchError ? { error: fetchError } : null)
+        p_details: incidentDetails
    })

    if (incidentErr) {
@@ -223,7 +301,8 @@ async function checkOneChannel(supa: SupabaseClient, channel: ChannelRow, now: D
        previous_status: channel.connection_status,
        new_status: newStatus,
        action: 'opened',
-        incident_id: newIncidentId
+        incident_id: newIncidentId,
+        reconnect_attempted: reconnectAttempted
    }
 }

@@ -337,6 +416,7 @@ Deno.serve(async (req) => {
            checked: results.length,
            opened: results.filter((r) => r.action === 'opened').length,
            resolved: results.filter((r) => r.action === 'resolved').length,
+            auto_reconnected: results.filter((r) => r.action === 'auto_reconnected').length,
            still_unhealthy: results.filter((r) => r.action === 'still_unhealthy').length,
            ok: results.filter((r) => r.action === 'ok').length,
            errors: results.filter((r) => r.action === 'fetch_error' || r.action === 'config_missing').length