Fix misrouted notifications from SQLite lock contention

send_message silently fell back to the default conversation whenever the
plan-time or slack-channel lookup errored, because the query error was
ignored. A transient "database is locked" (from the channel-creation/sync
routine holding the write lock while making slow Slack API calls) thus
misrouted a service notification to the admin DM instead of the event
channel.

- api.go: capture lookup errors and only treat gorm.ErrRecordNotFound as
  "no service / no channel"; on any other error, fail so the caller retries
  rather than posting to the wrong conversation.
- database.go: open SQLite with WAL journaling and a 10s busy timeout so
  reads proceed alongside the sync writer instead of locking.
- Bump version to 0.2.2.
This commit is contained in:
James Coleman 2026-06-21 10:19:41 -05:00
parent 6857622c26
commit 42d3251251
3 changed files with 43 additions and 5 deletions

29
api.go
View file

@ -2,12 +2,14 @@ package main
import (
"encoding/json"
"errors"
"log"
"net/http"
"time"
"github.com/gorilla/mux"
"github.com/slack-go/slack"
"gorm.io/gorm"
)
// Commonly used strings.
@ -98,13 +100,34 @@ func (s *HTTPServer) RegisterAPIRoutes(r *mux.Router) {
now := time.Now().UTC()
conversation := app.config.Slack.DefaultConversation
// Find plan times that are occuring right now.
// Find plan times that are occuring right now. A 60-minute buffer
// is applied past ends_at so services that run long still resolve
// to their event channel instead of falling back to the default.
// Order by starts_at DESC so the most recent active service wins
// when a later service's window overlaps an earlier service's buffer.
var planTime PlanTimes
app.db.Where("time_type='service' AND starts_at < ? AND ends_at > ?", now, now).First(&planTime)
err = app.db.Where("time_type='service' AND starts_at < ? AND ends_at > ?", now, now.Add(-60*time.Minute)).Order("starts_at DESC").First(&planTime).Error
// A "record not found" simply means no service is occuring right now, in
// which case we fall back to the default conversation. Any other error
// (e.g. "database is locked") must NOT be swallowed: treating it as "no
// service" would silently misroute the message to the default
// conversation instead of the event channel. Fail so the caller retries.
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
log.Println("Error looking up plan time:", err)
s.APISendGeneralResp(w, APIERR, "Error looking up plan time")
return
}
if planTime.Plan != 0 {
// If plan found, check for the slack channel.
var channel SlackChannels
app.db.Where("pc_plan = ?", planTime.Plan).First(&channel)
err = app.db.Where("pc_plan = ?", planTime.Plan).First(&channel).Error
// As above, only "record not found" is a benign result here. On any
// other error we must not fall through to the default conversation.
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
log.Println("Error looking up slack channel:", err)
s.APISendGeneralResp(w, APIERR, "Error looking up slack channel")
return
}
if channel.ID != "" {
// If slack channel found, update the conversation to the channel ID.
conversation = channel.ID

View file

@ -2,6 +2,7 @@ package main
import (
"log"
"strings"
"time"
"gorm.io/driver/mysql"
@ -126,7 +127,21 @@ func (a *App) InitDB() {
}
// Depending on connection configuration, open the database.
if a.config.DB.Type == "sqlite3" {
a.db, err = gorm.Open(sqlite.Open(a.config.DB.Connection), dbConfig)
// Enable WAL journaling and a busy timeout. Without WAL, a single
// long-running writer (e.g. the channel-creation/sync routine, which
// interleaves slow Slack API calls with its writes) blocks all readers,
// causing "database is locked" on concurrent reads such as the
// send_message channel lookup. WAL lets reads proceed alongside the
// writer, and the busy timeout makes any remaining contention wait
// rather than fail immediately. Append as DSN pragmas, preserving any
// query string already present in the configured connection.
conn := a.config.DB.Connection
sep := "?"
if strings.Contains(conn, "?") {
sep = "&"
}
conn += sep + "_journal_mode=WAL&_busy_timeout=10000"
a.db, err = gorm.Open(sqlite.Open(conn), dbConfig)
} else if a.config.DB.Type == "mysql" {
a.db, err = gorm.Open(mysql.Open(a.config.DB.Connection), dbConfig)
} else if a.config.DB.Type == "postgres" {

View file

@ -13,7 +13,7 @@ import (
const (
serviceName = "service-notifications"
serviceDescription = "Notifications for church services"
serviceVersion = "0.2.1"
serviceVersion = "0.2.2"
)
// App is the global application structure for communicating between servers and storing information.