Fix misrouted notifications from SQLite lock contention
send_message silently fell back to the default conversation whenever the plan-time or slack-channel lookup errored, because the query error was ignored. A transient "database is locked" (from the channel-creation/sync routine holding the write lock while making slow Slack API calls) thus misrouted a service notification to the admin DM instead of the event channel. - api.go: capture lookup errors and only treat gorm.ErrRecordNotFound as "no service / no channel"; on any other error, fail so the caller retries rather than posting to the wrong conversation. - database.go: open SQLite with WAL journaling and a 10s busy timeout so reads proceed alongside the sync writer instead of locking. - Bump version to 0.2.2.
This commit is contained in:
parent
6857622c26
commit
42d3251251
3 changed files with 43 additions and 5 deletions
29
api.go
29
api.go
|
|
@ -2,12 +2,14 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"github.com/slack-go/slack"
|
"github.com/slack-go/slack"
|
||||||
|
"gorm.io/gorm"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Commonly used strings.
|
// Commonly used strings.
|
||||||
|
|
@ -98,13 +100,34 @@ func (s *HTTPServer) RegisterAPIRoutes(r *mux.Router) {
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
conversation := app.config.Slack.DefaultConversation
|
conversation := app.config.Slack.DefaultConversation
|
||||||
|
|
||||||
// Find plan times that are occuring right now.
|
// Find plan times that are occuring right now. A 60-minute buffer
|
||||||
|
// is applied past ends_at so services that run long still resolve
|
||||||
|
// to their event channel instead of falling back to the default.
|
||||||
|
// Order by starts_at DESC so the most recent active service wins
|
||||||
|
// when a later service's window overlaps an earlier service's buffer.
|
||||||
var planTime PlanTimes
|
var planTime PlanTimes
|
||||||
app.db.Where("time_type='service' AND starts_at < ? AND ends_at > ?", now, now).First(&planTime)
|
err = app.db.Where("time_type='service' AND starts_at < ? AND ends_at > ?", now, now.Add(-60*time.Minute)).Order("starts_at DESC").First(&planTime).Error
|
||||||
|
// A "record not found" simply means no service is occuring right now, in
|
||||||
|
// which case we fall back to the default conversation. Any other error
|
||||||
|
// (e.g. "database is locked") must NOT be swallowed: treating it as "no
|
||||||
|
// service" would silently misroute the message to the default
|
||||||
|
// conversation instead of the event channel. Fail so the caller retries.
|
||||||
|
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||||
|
log.Println("Error looking up plan time:", err)
|
||||||
|
s.APISendGeneralResp(w, APIERR, "Error looking up plan time")
|
||||||
|
return
|
||||||
|
}
|
||||||
if planTime.Plan != 0 {
|
if planTime.Plan != 0 {
|
||||||
// If plan found, check for the slack channel.
|
// If plan found, check for the slack channel.
|
||||||
var channel SlackChannels
|
var channel SlackChannels
|
||||||
app.db.Where("pc_plan = ?", planTime.Plan).First(&channel)
|
err = app.db.Where("pc_plan = ?", planTime.Plan).First(&channel).Error
|
||||||
|
// As above, only "record not found" is a benign result here. On any
|
||||||
|
// other error we must not fall through to the default conversation.
|
||||||
|
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||||
|
log.Println("Error looking up slack channel:", err)
|
||||||
|
s.APISendGeneralResp(w, APIERR, "Error looking up slack channel")
|
||||||
|
return
|
||||||
|
}
|
||||||
if channel.ID != "" {
|
if channel.ID != "" {
|
||||||
// If slack channel found, update the conversation to the channel ID.
|
// If slack channel found, update the conversation to the channel ID.
|
||||||
conversation = channel.ID
|
conversation = channel.ID
|
||||||
|
|
|
||||||
17
database.go
17
database.go
|
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gorm.io/driver/mysql"
|
"gorm.io/driver/mysql"
|
||||||
|
|
@ -126,7 +127,21 @@ func (a *App) InitDB() {
|
||||||
}
|
}
|
||||||
// Depending on connection configuration, open the database.
|
// Depending on connection configuration, open the database.
|
||||||
if a.config.DB.Type == "sqlite3" {
|
if a.config.DB.Type == "sqlite3" {
|
||||||
a.db, err = gorm.Open(sqlite.Open(a.config.DB.Connection), dbConfig)
|
// Enable WAL journaling and a busy timeout. Without WAL, a single
|
||||||
|
// long-running writer (e.g. the channel-creation/sync routine, which
|
||||||
|
// interleaves slow Slack API calls with its writes) blocks all readers,
|
||||||
|
// causing "database is locked" on concurrent reads such as the
|
||||||
|
// send_message channel lookup. WAL lets reads proceed alongside the
|
||||||
|
// writer, and the busy timeout makes any remaining contention wait
|
||||||
|
// rather than fail immediately. Append as DSN pragmas, preserving any
|
||||||
|
// query string already present in the configured connection.
|
||||||
|
conn := a.config.DB.Connection
|
||||||
|
sep := "?"
|
||||||
|
if strings.Contains(conn, "?") {
|
||||||
|
sep = "&"
|
||||||
|
}
|
||||||
|
conn += sep + "_journal_mode=WAL&_busy_timeout=10000"
|
||||||
|
a.db, err = gorm.Open(sqlite.Open(conn), dbConfig)
|
||||||
} else if a.config.DB.Type == "mysql" {
|
} else if a.config.DB.Type == "mysql" {
|
||||||
a.db, err = gorm.Open(mysql.Open(a.config.DB.Connection), dbConfig)
|
a.db, err = gorm.Open(mysql.Open(a.config.DB.Connection), dbConfig)
|
||||||
} else if a.config.DB.Type == "postgres" {
|
} else if a.config.DB.Type == "postgres" {
|
||||||
|
|
|
||||||
2
main.go
2
main.go
|
|
@ -13,7 +13,7 @@ import (
|
||||||
const (
|
const (
|
||||||
serviceName = "service-notifications"
|
serviceName = "service-notifications"
|
||||||
serviceDescription = "Notifications for church services"
|
serviceDescription = "Notifications for church services"
|
||||||
serviceVersion = "0.2.1"
|
serviceVersion = "0.2.2"
|
||||||
)
|
)
|
||||||
|
|
||||||
// App is the global application structure for communicating between servers and storing information.
|
// App is the global application structure for communicating between servers and storing information.
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue