6.3 Reconnect automático Evolution antes de abrir incident

Fluxo novo no heartbeat-check quando threshold vence:
1. Verifica se reconnect está habilitado (metadata.heartbeat_reconnect_
   enabled, default true) E se API respondeu (sem fetchError) E se
   passou do cooldown de 10min desde a última tentativa.
2. POST /instance/restart/{instance} na Evolution.
3. Aguarda 3s pra estabilizar + rechecka connectionState.
4. Se state voltou pra 'open': restaura connected + limpa
   first_unhealthy_at + incrementa heartbeat_reconnect_count + resolve
   qualquer incident aberto. Retorna action='auto_reconnected'.
5. Senão: atualiza heartbeat_reconnect_last_at (respeita cooldown) e
   abre incident normalmente com details.reconnect_attempted=true.

Anti-loop: 1 tentativa por ciclo (não retry), cooldown de 10min/channel
pra não martelar Evolution nem gerar restart infinito. Tentativas são
contadas em metadata.heartbeat_reconnect_count (auditoria futura).

UI em /configuracoes/whatsapp-pessoal ganha novo toggle no card de
Monitoramento: "Tentar reconectar automaticamente" (default ligado)
com explicação clara. Tenant pode desligar se preferir ser alertado
imediato sem tentativa.

Summary do endpoint agora inclui auto_reconnected count — útil pra
métricas de confiabilidade da Evolution.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Leonardo
2026-04-23 12:19:52 -03:00
parent 0f643817c2
commit 4e4bac622c
2 changed files with 102 additions and 7 deletions
@@ -218,7 +218,11 @@ async function checkConnectionStatus() {
// ──────────────────────────────────────────────────────────────
// Monitoramento de conexão (Heartbeat — Grupo 6.1)
// ──────────────────────────────────────────────────────────────
const heartbeatConfig = ref({ threshold_minutes: 5, alerts_enabled: true });
const heartbeatConfig = ref({
threshold_minutes: 5,
alerts_enabled: true,
reconnect_enabled: true
});
const heartbeatConfigSaving = ref(false);
const incidents = ref([]);
const incidentsLoading = ref(false);
@@ -228,7 +232,8 @@ async function loadHeartbeatConfig() {
const meta = channelRecord.value.metadata || {};
heartbeatConfig.value = {
threshold_minutes: Number(meta.heartbeat_threshold_minutes) || 5,
alerts_enabled: meta.heartbeat_alerts_enabled !== false
alerts_enabled: meta.heartbeat_alerts_enabled !== false,
reconnect_enabled: meta.heartbeat_reconnect_enabled !== false
};
}
@@ -240,7 +245,8 @@ async function saveHeartbeatConfig() {
const newMeta = {
...(channelRecord.value.metadata || {}),
heartbeat_threshold_minutes: threshold,
heartbeat_alerts_enabled: !!heartbeatConfig.value.alerts_enabled
heartbeat_alerts_enabled: !!heartbeatConfig.value.alerts_enabled,
heartbeat_reconnect_enabled: !!heartbeatConfig.value.reconnect_enabled
};
const { error } = await supabase
.from('notification_channels')
@@ -868,6 +874,15 @@ onBeforeUnmount(() => {
</div>
</div>
<!-- Reconnect automático -->
<div class="flex items-start gap-2 pt-2 border-t border-[var(--surface-border)]">
<ToggleSwitch v-model="heartbeatConfig.reconnect_enabled" inputId="hb-reconnect" />
<label for="hb-reconnect" class="text-sm cursor-pointer select-none flex-1">
<span class="font-semibold">Tentar reconectar automaticamente</span>
<span class="block text-xs text-[var(--text-color-secondary)]">Antes de abrir um alerta, o sistema pede à Evolution pra restaurar a sessão. Se funcionar, ninguém é incomodado. Máximo 1 tentativa a cada 10 minutos.</span>
</label>
</div>
<!-- Histórico de incidents -->
<div class="border-t border-[var(--surface-border)] pt-3">
<div class="text-xs font-semibold uppercase tracking-wide text-[var(--text-color-secondary)] mb-2">
@@ -35,6 +35,10 @@ const corsHeaders = {
const DEFAULT_THRESHOLD_MINUTES = 5
const FETCH_TIMEOUT_MS = 8000
// Cooldown entre tentativas de reconnect por canal pra não martelar a Evolution
const RECONNECT_COOLDOWN_MS = 10 * 60 * 1000
// Quanto esperar depois de pedir restart antes de re-verificar o state
const RECONNECT_SETTLE_MS = 3000
function json(body: unknown, status = 200) {
return new Response(JSON.stringify(body), {
@@ -99,8 +103,9 @@ async function checkOneChannel(supa: SupabaseClient, channel: ChannelRow, now: D
channel_id: string
previous_status: string | null
new_status: string
action: 'ok' | 'opened' | 'resolved' | 'still_unhealthy' | 'no_change' | 'config_missing' | 'fetch_error'
action: 'ok' | 'opened' | 'resolved' | 'still_unhealthy' | 'no_change' | 'config_missing' | 'fetch_error' | 'auto_reconnected'
incident_id?: string
reconnect_attempted?: boolean
}> {
const creds = channel.credentials || {}
const apiUrl = String(creds.api_url || '').trim()
@@ -186,12 +191,85 @@ async function checkOneChannel(supa: SupabaseClient, channel: ChannelRow, now: D
}
}
// Passou do threshold — abre incident (idempotente)
// Passou do threshold — antes de abrir incident, tenta reconnect automático (6.3)
// Só tenta se: reconnect habilitado + API respondeu (sem fetchError) + passou do cooldown
const reconnectEnabled = meta.heartbeat_reconnect_enabled !== false
const lastReconnectAtRaw = meta.heartbeat_reconnect_last_at as string | undefined
const lastReconnectAt = lastReconnectAtRaw ? new Date(lastReconnectAtRaw) : null
const canReconnect = reconnectEnabled
&& !fetchError
&& (!lastReconnectAt || isNaN(lastReconnectAt.getTime()) || (now.getTime() - lastReconnectAt.getTime()) >= RECONNECT_COOLDOWN_MS)
let reconnectAttempted = false
if (canReconnect) {
reconnectAttempted = true
try {
// POST /instance/restart/{instance} — instrui Evolution a reconectar a sessão
await fetchWithTimeout(`${base}/instance/restart/${encodeURIComponent(instance)}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', apikey: apiKey }
}, FETCH_TIMEOUT_MS)
// Aguarda estabilizar
await new Promise((r) => setTimeout(r, RECONNECT_SETTLE_MS))
// Rechecka o state
const res2 = await fetchWithTimeout(targetUrl, {
method: 'GET',
headers: { 'Content-Type': 'application/json', apikey: apiKey }
}, FETCH_TIMEOUT_MS)
if (res2.ok) {
const body2 = await res2.json().catch(() => null) as { instance?: { state?: string }, state?: string } | null
const state2 = body2?.instance?.state ?? body2?.state ?? null
if (state2 === 'open') {
// Reconnect funcionou: restaura estado como se nunca tivesse caído
const cleanedMeta = { ...newMeta }
delete cleanedMeta.first_unhealthy_at
cleanedMeta.heartbeat_reconnect_last_at = now.toISOString()
cleanedMeta.heartbeat_reconnect_count = (Number(cleanedMeta.heartbeat_reconnect_count) || 0) + 1
await supa.from('notification_channels').update({
connection_status: 'connected',
last_health_check: now.toISOString(),
metadata: cleanedMeta
}).eq('id', channel.id)
// Resolve qualquer incident aberto desse channel (caso tenha sobrado de ciclo anterior)
await supa.rpc('whatsapp_heartbeat_resolve_open_incidents', { p_channel_id: channel.id })
return {
tenant_id: channel.tenant_id,
channel_id: channel.id,
previous_status: channel.connection_status,
new_status: 'connected',
action: 'auto_reconnected',
reconnect_attempted: true
}
}
}
} catch {
// silencioso — reconnect falhou, segue fluxo de abrir incident
}
// Marca tentativa (mesmo que falhou) pra respeitar o cooldown
newMeta.heartbeat_reconnect_last_at = now.toISOString()
await supa.from('notification_channels').update({ metadata: newMeta }).eq('id', channel.id)
}
// Passou do threshold (e reconnect falhou / não tentou) — abre incident (idempotente)
const incidentDetails = {
...(typeof rawBody === 'object' && rawBody ? rawBody : {}),
...(fetchError ? { error: fetchError } : {}),
reconnect_attempted: reconnectAttempted
}
const { data: incidentId, error: incidentErr } = await supa.rpc('whatsapp_heartbeat_open_incident', {
p_channel_id: channel.id,
p_kind: kind,
p_last_state: state || fetchError,
p_details: rawBody || (fetchError ? { error: fetchError } : null)
p_details: incidentDetails
})
if (incidentErr) {
@@ -223,7 +301,8 @@ async function checkOneChannel(supa: SupabaseClient, channel: ChannelRow, now: D
previous_status: channel.connection_status,
new_status: newStatus,
action: 'opened',
incident_id: newIncidentId
incident_id: newIncidentId,
reconnect_attempted: reconnectAttempted
}
}
@@ -337,6 +416,7 @@ Deno.serve(async (req) => {
checked: results.length,
opened: results.filter((r) => r.action === 'opened').length,
resolved: results.filter((r) => r.action === 'resolved').length,
auto_reconnected: results.filter((r) => r.action === 'auto_reconnected').length,
still_unhealthy: results.filter((r) => r.action === 'still_unhealthy').length,
ok: results.filter((r) => r.action === 'ok').length,
errors: results.filter((r) => r.action === 'fetch_error' || r.action === 'config_missing').length