4e4bac622c
Fluxo novo no heartbeat-check quando threshold vence:
1. Verifica se reconnect está habilitado (metadata.heartbeat_reconnect_
enabled, default true) E se API respondeu (sem fetchError) E se
passou do cooldown de 10min desde a última tentativa.
2. POST /instance/restart/{instance} na Evolution.
3. Aguarda 3s pra estabilizar + rechecka connectionState.
4. Se state voltou pra 'open': restaura connected + limpa
first_unhealthy_at + incrementa heartbeat_reconnect_count + resolve
qualquer incident aberto. Retorna action='auto_reconnected'.
5. Senão: atualiza heartbeat_reconnect_last_at (respeita cooldown) e
abre incident normalmente com details.reconnect_attempted=true.
Anti-loop: 1 tentativa por ciclo (não retry), cooldown de 10min/channel
pra não martelar Evolution nem gerar restart infinito. Tentativas são
contadas em metadata.heartbeat_reconnect_count (auditoria futura).
UI em /configuracoes/whatsapp-pessoal ganha novo toggle no card de
Monitoramento: "Tentar reconectar automaticamente" (default ligado)
com explicação clara. Tenant pode desligar se preferir ser alertado
imediato sem tentativa.
Summary do endpoint agora inclui auto_reconnected count — útil pra
métricas de confiabilidade da Evolution.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
430 lines
17 KiB
TypeScript
430 lines
17 KiB
TypeScript
/*
|
|
|--------------------------------------------------------------------------
|
|
| Agência PSI — Edge Function: whatsapp-heartbeat-check
|
|
|--------------------------------------------------------------------------
|
|
| Cron a cada 2 minutos. Checa conexão Evolution de todos os tenants com
|
|
| WhatsApp Pessoal ativo. Atualiza connection_status + last_health_check.
|
|
|
|
|
| Fluxo por channel:
|
|
| 1. GET {api_url}/instance/connectionState/{instance_name}
|
|
| 2. Se state === 'open' → marca connected, limpa first_unhealthy_at,
|
|
| resolve incidents abertos
|
|
| 3. Se state !== 'open' → mapeia pra connection_status, seta
|
|
| first_unhealthy_at (se não tinha), e:
|
|
| - Se já passou `heartbeat_threshold_minutes` (default 5) desde
|
|
| first_unhealthy_at → abre incident (idempotente) + notifica
|
|
| admins ativos do tenant
|
|
| - Senão só atualiza status (ainda não alerta)
|
|
| 4. Se erro HTTP / timeout → kind='error', segue mesma regra do caso 3
|
|
|
|
|
| Config por tenant (em notification_channels.metadata):
|
|
| - heartbeat_threshold_minutes (default 5)
|
|
| - heartbeat_alerts_enabled (default true)
|
|
|
|
|
| Não alerta duas vezes pelo mesmo incident (checa notified_at).
|
|
|--------------------------------------------------------------------------
|
|
*/
|
|
|
|
import { createClient, SupabaseClient } from 'https://esm.sh/@supabase/supabase-js@2'
|
|
|
|
const corsHeaders = {
|
|
'Access-Control-Allow-Origin': '*',
|
|
'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
|
|
'Access-Control-Allow-Methods': 'POST, GET, OPTIONS',
|
|
}
|
|
|
|
const DEFAULT_THRESHOLD_MINUTES = 5
|
|
const FETCH_TIMEOUT_MS = 8000
|
|
// Cooldown entre tentativas de reconnect por canal pra não martelar a Evolution
|
|
const RECONNECT_COOLDOWN_MS = 10 * 60 * 1000
|
|
// Quanto esperar depois de pedir restart antes de re-verificar o state
|
|
const RECONNECT_SETTLE_MS = 3000
|
|
|
|
function json(body: unknown, status = 200) {
|
|
return new Response(JSON.stringify(body), {
|
|
status,
|
|
headers: { ...corsHeaders, 'Content-Type': 'application/json' }
|
|
})
|
|
}
|
|
|
|
function rewriteForContainer(apiUrl: string): string {
|
|
try {
|
|
const u = new URL(apiUrl)
|
|
if (u.hostname === 'localhost' || u.hostname === '127.0.0.1') {
|
|
u.hostname = 'host.docker.internal'
|
|
return u.toString().replace(/\/+$/, '')
|
|
}
|
|
return apiUrl.replace(/\/+$/, '')
|
|
} catch {
|
|
return apiUrl
|
|
}
|
|
}
|
|
|
|
function mapStateToStatus(state: string | null): { status: string, kind: string } {
|
|
switch (state) {
|
|
case 'open':
|
|
return { status: 'connected', kind: 'connected' }
|
|
case 'connecting':
|
|
return { status: 'connecting', kind: 'connecting' }
|
|
case 'qr':
|
|
case 'qrcode':
|
|
return { status: 'qr_pending', kind: 'qr_pending' }
|
|
case 'close':
|
|
case 'closed':
|
|
return { status: 'disconnected', kind: 'disconnected' }
|
|
default:
|
|
return { status: 'error', kind: 'unknown' }
|
|
}
|
|
}
|
|
|
|
async function fetchWithTimeout(url: string, init: RequestInit, timeoutMs: number): Promise<Response> {
|
|
const controller = new AbortController()
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs)
|
|
try {
|
|
return await fetch(url, { ...init, signal: controller.signal })
|
|
} finally {
|
|
clearTimeout(timer)
|
|
}
|
|
}
|
|
|
|
interface ChannelRow {
|
|
id: string
|
|
tenant_id: string
|
|
owner_id: string
|
|
provider: string
|
|
credentials: Record<string, string>
|
|
connection_status: string | null
|
|
last_health_check: string | null
|
|
metadata: Record<string, unknown> | null
|
|
}
|
|
|
|
async function checkOneChannel(supa: SupabaseClient, channel: ChannelRow, now: Date): Promise<{
|
|
tenant_id: string
|
|
channel_id: string
|
|
previous_status: string | null
|
|
new_status: string
|
|
action: 'ok' | 'opened' | 'resolved' | 'still_unhealthy' | 'no_change' | 'config_missing' | 'fetch_error' | 'auto_reconnected'
|
|
incident_id?: string
|
|
reconnect_attempted?: boolean
|
|
}> {
|
|
const creds = channel.credentials || {}
|
|
const apiUrl = String(creds.api_url || '').trim()
|
|
const apiKey = String(creds.api_key || '').trim()
|
|
const instance = String(creds.instance_name || '').trim()
|
|
|
|
if (!apiUrl || !apiKey || !instance) {
|
|
// Credencial incompleta — não alertamos, só marca error e segue
|
|
await supa.from('notification_channels')
|
|
.update({ connection_status: 'error', last_health_check: now.toISOString() })
|
|
.eq('id', channel.id)
|
|
return { tenant_id: channel.tenant_id, channel_id: channel.id, previous_status: channel.connection_status, new_status: 'error', action: 'config_missing' }
|
|
}
|
|
|
|
const base = rewriteForContainer(apiUrl)
|
|
const targetUrl = `${base}/instance/connectionState/${encodeURIComponent(instance)}`
|
|
|
|
let state: string | null = null
|
|
let rawBody: unknown = null
|
|
let fetchError: string | null = null
|
|
try {
|
|
const res = await fetchWithTimeout(targetUrl, {
|
|
method: 'GET',
|
|
headers: { 'Content-Type': 'application/json', apikey: apiKey }
|
|
}, FETCH_TIMEOUT_MS)
|
|
if (!res.ok) {
|
|
fetchError = `http_${res.status}`
|
|
} else {
|
|
rawBody = await res.json().catch(() => null)
|
|
// Evolution retorna: { instance: { instanceName, state }} OU { state }
|
|
const body = rawBody as { instance?: { state?: string }, state?: string } | null
|
|
state = body?.instance?.state ?? body?.state ?? null
|
|
}
|
|
} catch (e) {
|
|
fetchError = (e as Error).message || 'fetch_failed'
|
|
}
|
|
|
|
const { status: newStatus, kind } = fetchError ? { status: 'error', kind: 'error' } : mapStateToStatus(state)
|
|
const meta = (channel.metadata || {}) as Record<string, unknown>
|
|
const thresholdMinutes = Number(meta.heartbeat_threshold_minutes) || DEFAULT_THRESHOLD_MINUTES
|
|
const alertsEnabled = meta.heartbeat_alerts_enabled !== false
|
|
const firstUnhealthyAtRaw = meta.first_unhealthy_at as string | undefined
|
|
|
|
const patch: Record<string, unknown> = {
|
|
connection_status: newStatus,
|
|
last_health_check: now.toISOString()
|
|
}
|
|
const newMeta = { ...meta }
|
|
|
|
if (newStatus === 'connected') {
|
|
// Recuperou
|
|
if (firstUnhealthyAtRaw) delete newMeta.first_unhealthy_at
|
|
patch.metadata = newMeta
|
|
|
|
await supa.from('notification_channels').update(patch).eq('id', channel.id)
|
|
const { data: resolved } = await supa.rpc('whatsapp_heartbeat_resolve_open_incidents', { p_channel_id: channel.id })
|
|
return {
|
|
tenant_id: channel.tenant_id,
|
|
channel_id: channel.id,
|
|
previous_status: channel.connection_status,
|
|
new_status: newStatus,
|
|
action: (resolved as unknown as number) > 0 ? 'resolved' : 'ok'
|
|
}
|
|
}
|
|
|
|
// Está unhealthy — seta first_unhealthy_at se ainda não tinha
|
|
const firstUnhealthyAt = firstUnhealthyAtRaw ? new Date(firstUnhealthyAtRaw) : null
|
|
if (!firstUnhealthyAt || isNaN(firstUnhealthyAt.getTime())) {
|
|
newMeta.first_unhealthy_at = now.toISOString()
|
|
}
|
|
patch.metadata = newMeta
|
|
await supa.from('notification_channels').update(patch).eq('id', channel.id)
|
|
|
|
const minutesUnhealthy = firstUnhealthyAt ? (now.getTime() - firstUnhealthyAt.getTime()) / 60000 : 0
|
|
|
|
if (minutesUnhealthy < thresholdMinutes) {
|
|
return {
|
|
tenant_id: channel.tenant_id,
|
|
channel_id: channel.id,
|
|
previous_status: channel.connection_status,
|
|
new_status: newStatus,
|
|
action: firstUnhealthyAt ? 'still_unhealthy' : 'no_change'
|
|
}
|
|
}
|
|
|
|
// Passou do threshold — antes de abrir incident, tenta reconnect automático (6.3)
|
|
// Só tenta se: reconnect habilitado + API respondeu (sem fetchError) + passou do cooldown
|
|
const reconnectEnabled = meta.heartbeat_reconnect_enabled !== false
|
|
const lastReconnectAtRaw = meta.heartbeat_reconnect_last_at as string | undefined
|
|
const lastReconnectAt = lastReconnectAtRaw ? new Date(lastReconnectAtRaw) : null
|
|
const canReconnect = reconnectEnabled
|
|
&& !fetchError
|
|
&& (!lastReconnectAt || isNaN(lastReconnectAt.getTime()) || (now.getTime() - lastReconnectAt.getTime()) >= RECONNECT_COOLDOWN_MS)
|
|
|
|
let reconnectAttempted = false
|
|
|
|
if (canReconnect) {
|
|
reconnectAttempted = true
|
|
try {
|
|
// POST /instance/restart/{instance} — instrui Evolution a reconectar a sessão
|
|
await fetchWithTimeout(`${base}/instance/restart/${encodeURIComponent(instance)}`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json', apikey: apiKey }
|
|
}, FETCH_TIMEOUT_MS)
|
|
|
|
// Aguarda estabilizar
|
|
await new Promise((r) => setTimeout(r, RECONNECT_SETTLE_MS))
|
|
|
|
// Rechecka o state
|
|
const res2 = await fetchWithTimeout(targetUrl, {
|
|
method: 'GET',
|
|
headers: { 'Content-Type': 'application/json', apikey: apiKey }
|
|
}, FETCH_TIMEOUT_MS)
|
|
|
|
if (res2.ok) {
|
|
const body2 = await res2.json().catch(() => null) as { instance?: { state?: string }, state?: string } | null
|
|
const state2 = body2?.instance?.state ?? body2?.state ?? null
|
|
|
|
if (state2 === 'open') {
|
|
// Reconnect funcionou: restaura estado como se nunca tivesse caído
|
|
const cleanedMeta = { ...newMeta }
|
|
delete cleanedMeta.first_unhealthy_at
|
|
cleanedMeta.heartbeat_reconnect_last_at = now.toISOString()
|
|
cleanedMeta.heartbeat_reconnect_count = (Number(cleanedMeta.heartbeat_reconnect_count) || 0) + 1
|
|
|
|
await supa.from('notification_channels').update({
|
|
connection_status: 'connected',
|
|
last_health_check: now.toISOString(),
|
|
metadata: cleanedMeta
|
|
}).eq('id', channel.id)
|
|
|
|
// Resolve qualquer incident aberto desse channel (caso tenha sobrado de ciclo anterior)
|
|
await supa.rpc('whatsapp_heartbeat_resolve_open_incidents', { p_channel_id: channel.id })
|
|
|
|
return {
|
|
tenant_id: channel.tenant_id,
|
|
channel_id: channel.id,
|
|
previous_status: channel.connection_status,
|
|
new_status: 'connected',
|
|
action: 'auto_reconnected',
|
|
reconnect_attempted: true
|
|
}
|
|
}
|
|
}
|
|
} catch {
|
|
// silencioso — reconnect falhou, segue fluxo de abrir incident
|
|
}
|
|
|
|
// Marca tentativa (mesmo que falhou) pra respeitar o cooldown
|
|
newMeta.heartbeat_reconnect_last_at = now.toISOString()
|
|
await supa.from('notification_channels').update({ metadata: newMeta }).eq('id', channel.id)
|
|
}
|
|
|
|
// Passou do threshold (e reconnect falhou / não tentou) — abre incident (idempotente)
|
|
const incidentDetails = {
|
|
...(typeof rawBody === 'object' && rawBody ? rawBody : {}),
|
|
...(fetchError ? { error: fetchError } : {}),
|
|
reconnect_attempted: reconnectAttempted
|
|
}
|
|
const { data: incidentId, error: incidentErr } = await supa.rpc('whatsapp_heartbeat_open_incident', {
|
|
p_channel_id: channel.id,
|
|
p_kind: kind,
|
|
p_last_state: state || fetchError,
|
|
p_details: incidentDetails
|
|
})
|
|
|
|
if (incidentErr) {
|
|
return {
|
|
tenant_id: channel.tenant_id,
|
|
channel_id: channel.id,
|
|
previous_status: channel.connection_status,
|
|
new_status: newStatus,
|
|
action: 'fetch_error'
|
|
}
|
|
}
|
|
|
|
const newIncidentId = incidentId as unknown as string
|
|
|
|
if (alertsEnabled && newIncidentId) {
|
|
await notifyChannelStakeholders(supa, {
|
|
tenant_id: channel.tenant_id,
|
|
channel_owner_id: channel.owner_id,
|
|
incident_id: newIncidentId,
|
|
channel_display: String(channel.provider === 'evolution_api' ? 'WhatsApp Pessoal' : 'WhatsApp'),
|
|
kind,
|
|
minutes_unhealthy: Math.round(minutesUnhealthy)
|
|
})
|
|
}
|
|
|
|
return {
|
|
tenant_id: channel.tenant_id,
|
|
channel_id: channel.id,
|
|
previous_status: channel.connection_status,
|
|
new_status: newStatus,
|
|
action: 'opened',
|
|
incident_id: newIncidentId,
|
|
reconnect_attempted: reconnectAttempted
|
|
}
|
|
}
|
|
|
|
async function notifyChannelStakeholders(supa: SupabaseClient, params: {
|
|
tenant_id: string
|
|
channel_owner_id: string
|
|
incident_id: string
|
|
channel_display: string
|
|
kind: string
|
|
minutes_unhealthy: number
|
|
}): Promise<void> {
|
|
// Checa se já notificou esse incident
|
|
const { data: incident } = await supa
|
|
.from('whatsapp_connection_incidents')
|
|
.select('notified_at, notification_count')
|
|
.eq('id', params.incident_id)
|
|
.maybeSingle()
|
|
|
|
if (incident?.notified_at) return // anti-spam: só notifica 1x pelo mesmo incident
|
|
|
|
// Stakeholders = owner do canal + admins ativos do tenant (deduplicado).
|
|
// owner geralmente é o dono do celular (WhatsApp Pessoal) ou admin da clínica;
|
|
// admins garantem que alguém com permissão de infra seja alertado.
|
|
const userIds = new Set<string>()
|
|
if (params.channel_owner_id) userIds.add(params.channel_owner_id)
|
|
|
|
const { data: admins } = await supa
|
|
.from('tenant_members')
|
|
.select('user_id')
|
|
.eq('tenant_id', params.tenant_id)
|
|
.in('role', ['clinic_admin', 'tenant_admin'])
|
|
.eq('status', 'active')
|
|
|
|
for (const a of admins || []) userIds.add(a.user_id)
|
|
|
|
if (userIds.size === 0) return
|
|
|
|
const kindLabel: Record<string, string> = {
|
|
disconnected: 'desconectado',
|
|
qr_pending: 'aguardando QR Code',
|
|
error: 'com erro',
|
|
connecting: 'tentando conectar',
|
|
unknown: 'em estado desconhecido'
|
|
}
|
|
|
|
const title = `${params.channel_display} ${kindLabel[params.kind] || 'offline'}`
|
|
const detail = `A conexão está fora há cerca de ${params.minutes_unhealthy} min. Envios automáticos podem estar falhando.`
|
|
|
|
const rows = Array.from(userIds).map((uid) => ({
|
|
owner_id: uid,
|
|
tenant_id: params.tenant_id,
|
|
type: 'system_alert',
|
|
ref_id: params.incident_id,
|
|
ref_table: 'whatsapp_connection_incidents',
|
|
payload: {
|
|
title,
|
|
detail,
|
|
severity: 'warn',
|
|
deeplink: '/configuracoes/whatsapp-pessoal'
|
|
}
|
|
}))
|
|
|
|
await supa.from('notifications').insert(rows)
|
|
await supa.rpc('whatsapp_heartbeat_mark_notified', { p_incident_id: params.incident_id })
|
|
}
|
|
|
|
Deno.serve(async (req) => {
|
|
if (req.method === 'OPTIONS') return new Response('ok', { headers: corsHeaders })
|
|
|
|
const supa = createClient(
|
|
Deno.env.get('SUPABASE_URL') ?? '',
|
|
Deno.env.get('SUPABASE_SERVICE_ROLE_KEY') ?? '',
|
|
{ auth: { autoRefreshToken: false, persistSession: false } }
|
|
)
|
|
|
|
try {
|
|
// Canal específico (on-demand via UI do tenant) ou varredura completa
|
|
const url = new URL(req.url)
|
|
const singleChannelId = url.searchParams.get('channel_id')
|
|
|
|
let query = supa
|
|
.from('notification_channels')
|
|
.select('id, tenant_id, owner_id, provider, credentials, connection_status, last_health_check, metadata')
|
|
.eq('provider', 'evolution_api')
|
|
.eq('channel', 'whatsapp')
|
|
.eq('is_active', true)
|
|
.is('deleted_at', null)
|
|
|
|
if (singleChannelId) query = query.eq('id', singleChannelId)
|
|
|
|
const { data: channels, error: fetchErr } = await query
|
|
|
|
if (fetchErr) return json({ error: fetchErr.message }, 500)
|
|
if (!channels || channels.length === 0) {
|
|
return json({ checked: 0, results: [] })
|
|
}
|
|
|
|
const now = new Date()
|
|
const results = await Promise.all(
|
|
channels.map((ch) => checkOneChannel(supa, ch as ChannelRow, now).catch((e) => ({
|
|
tenant_id: (ch as ChannelRow).tenant_id,
|
|
channel_id: (ch as ChannelRow).id,
|
|
previous_status: (ch as ChannelRow).connection_status,
|
|
new_status: 'error',
|
|
action: 'fetch_error' as const,
|
|
error: (e as Error).message
|
|
})))
|
|
)
|
|
|
|
const summary = {
|
|
checked: results.length,
|
|
opened: results.filter((r) => r.action === 'opened').length,
|
|
resolved: results.filter((r) => r.action === 'resolved').length,
|
|
auto_reconnected: results.filter((r) => r.action === 'auto_reconnected').length,
|
|
still_unhealthy: results.filter((r) => r.action === 'still_unhealthy').length,
|
|
ok: results.filter((r) => r.action === 'ok').length,
|
|
errors: results.filter((r) => r.action === 'fetch_error' || r.action === 'config_missing').length
|
|
}
|
|
|
|
return json({ ...summary, results })
|
|
} catch (e) {
|
|
return json({ error: (e as Error).message || 'unexpected_error' }, 500)
|
|
}
|
|
})
|