Fix misrouted notifications from SQLite lock contention
send_message silently fell back to the default conversation whenever the plan-time or slack-channel lookup errored, because the query error was ignored. A transient "database is locked" (from the channel-creation/sync routine holding the write lock while making slow Slack API calls) thus misrouted a service notification to the admin DM instead of the event channel. - api.go: capture lookup errors and only treat gorm.ErrRecordNotFound as "no service / no channel"; on any other error, fail so the caller retries rather than posting to the wrong conversation. - database.go: open SQLite with WAL journaling and a 10s busy timeout so reads proceed alongside the sync writer instead of locking. - Bump version to 0.2.2.
This commit is contained in:
parent
6857622c26
commit
42d3251251
3 changed files with 43 additions and 5 deletions
29
api.go
29
api.go
|
|
@ -2,12 +2,14 @@ package main
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/slack-go/slack"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// Commonly used strings.
|
||||
|
|
@ -98,13 +100,34 @@ func (s *HTTPServer) RegisterAPIRoutes(r *mux.Router) {
|
|||
now := time.Now().UTC()
|
||||
conversation := app.config.Slack.DefaultConversation
|
||||
|
||||
// Find plan times that are occuring right now.
|
||||
// Find plan times that are occuring right now. A 60-minute buffer
|
||||
// is applied past ends_at so services that run long still resolve
|
||||
// to their event channel instead of falling back to the default.
|
||||
// Order by starts_at DESC so the most recent active service wins
|
||||
// when a later service's window overlaps an earlier service's buffer.
|
||||
var planTime PlanTimes
|
||||
app.db.Where("time_type='service' AND starts_at < ? AND ends_at > ?", now, now).First(&planTime)
|
||||
err = app.db.Where("time_type='service' AND starts_at < ? AND ends_at > ?", now, now.Add(-60*time.Minute)).Order("starts_at DESC").First(&planTime).Error
|
||||
// A "record not found" simply means no service is occuring right now, in
|
||||
// which case we fall back to the default conversation. Any other error
|
||||
// (e.g. "database is locked") must NOT be swallowed: treating it as "no
|
||||
// service" would silently misroute the message to the default
|
||||
// conversation instead of the event channel. Fail so the caller retries.
|
||||
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
log.Println("Error looking up plan time:", err)
|
||||
s.APISendGeneralResp(w, APIERR, "Error looking up plan time")
|
||||
return
|
||||
}
|
||||
if planTime.Plan != 0 {
|
||||
// If plan found, check for the slack channel.
|
||||
var channel SlackChannels
|
||||
app.db.Where("pc_plan = ?", planTime.Plan).First(&channel)
|
||||
err = app.db.Where("pc_plan = ?", planTime.Plan).First(&channel).Error
|
||||
// As above, only "record not found" is a benign result here. On any
|
||||
// other error we must not fall through to the default conversation.
|
||||
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
log.Println("Error looking up slack channel:", err)
|
||||
s.APISendGeneralResp(w, APIERR, "Error looking up slack channel")
|
||||
return
|
||||
}
|
||||
if channel.ID != "" {
|
||||
// If slack channel found, update the conversation to the channel ID.
|
||||
conversation = channel.ID
|
||||
|
|
|
|||
17
database.go
17
database.go
|
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gorm.io/driver/mysql"
|
||||
|
|
@ -126,7 +127,21 @@ func (a *App) InitDB() {
|
|||
}
|
||||
// Depending on connection configuration, open the database.
|
||||
if a.config.DB.Type == "sqlite3" {
|
||||
a.db, err = gorm.Open(sqlite.Open(a.config.DB.Connection), dbConfig)
|
||||
// Enable WAL journaling and a busy timeout. Without WAL, a single
|
||||
// long-running writer (e.g. the channel-creation/sync routine, which
|
||||
// interleaves slow Slack API calls with its writes) blocks all readers,
|
||||
// causing "database is locked" on concurrent reads such as the
|
||||
// send_message channel lookup. WAL lets reads proceed alongside the
|
||||
// writer, and the busy timeout makes any remaining contention wait
|
||||
// rather than fail immediately. Append as DSN pragmas, preserving any
|
||||
// query string already present in the configured connection.
|
||||
conn := a.config.DB.Connection
|
||||
sep := "?"
|
||||
if strings.Contains(conn, "?") {
|
||||
sep = "&"
|
||||
}
|
||||
conn += sep + "_journal_mode=WAL&_busy_timeout=10000"
|
||||
a.db, err = gorm.Open(sqlite.Open(conn), dbConfig)
|
||||
} else if a.config.DB.Type == "mysql" {
|
||||
a.db, err = gorm.Open(mysql.Open(a.config.DB.Connection), dbConfig)
|
||||
} else if a.config.DB.Type == "postgres" {
|
||||
|
|
|
|||
2
main.go
2
main.go
|
|
@ -13,7 +13,7 @@ import (
|
|||
const (
|
||||
serviceName = "service-notifications"
|
||||
serviceDescription = "Notifications for church services"
|
||||
serviceVersion = "0.2.1"
|
||||
serviceVersion = "0.2.2"
|
||||
)
|
||||
|
||||
// App is the global application structure for communicating between servers and storing information.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue