Fix Services & Update the middleware && others
This commit is contained in:
@@ -4,11 +4,13 @@ package service
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"gemini-balancer/internal/models"
|
||||
"gemini-balancer/internal/settings"
|
||||
"gemini-balancer/internal/store"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"gorm.io/gorm"
|
||||
@@ -18,25 +20,47 @@ type DBLogWriterService struct {
|
||||
db *gorm.DB
|
||||
store store.Store
|
||||
logger *logrus.Entry
|
||||
logBuffer chan *models.RequestLog
|
||||
stopChan chan struct{}
|
||||
wg sync.WaitGroup
|
||||
SettingsManager *settings.SettingsManager
|
||||
settingsManager *settings.SettingsManager
|
||||
|
||||
logBuffer chan *models.RequestLog
|
||||
stopChan chan struct{}
|
||||
wg sync.WaitGroup
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
|
||||
// 统计指标
|
||||
totalReceived atomic.Uint64
|
||||
totalFlushed atomic.Uint64
|
||||
totalDropped atomic.Uint64
|
||||
flushCount atomic.Uint64
|
||||
lastFlushTime time.Time
|
||||
lastFlushMutex sync.RWMutex
|
||||
}
|
||||
|
||||
func NewDBLogWriterService(db *gorm.DB, s store.Store, settings *settings.SettingsManager, logger *logrus.Logger) *DBLogWriterService {
|
||||
cfg := settings.GetSettings()
|
||||
func NewDBLogWriterService(
|
||||
db *gorm.DB,
|
||||
s store.Store,
|
||||
settingsManager *settings.SettingsManager,
|
||||
logger *logrus.Logger,
|
||||
) *DBLogWriterService {
|
||||
cfg := settingsManager.GetSettings()
|
||||
bufferCapacity := cfg.LogBufferCapacity
|
||||
if bufferCapacity <= 0 {
|
||||
bufferCapacity = 1000
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
return &DBLogWriterService{
|
||||
db: db,
|
||||
store: s,
|
||||
SettingsManager: settings,
|
||||
settingsManager: settingsManager,
|
||||
logger: logger.WithField("component", "DBLogWriter📝"),
|
||||
logBuffer: make(chan *models.RequestLog, bufferCapacity),
|
||||
stopChan: make(chan struct{}),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
lastFlushTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,93 +68,276 @@ func (s *DBLogWriterService) Start() {
|
||||
s.wg.Add(2)
|
||||
go s.eventListenerLoop()
|
||||
go s.dbWriterLoop()
|
||||
s.logger.Info("DBLogWriterService started.")
|
||||
|
||||
// 定期输出统计信息
|
||||
s.wg.Add(1)
|
||||
go s.metricsReporter()
|
||||
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"buffer_capacity": cap(s.logBuffer),
|
||||
}).Info("DBLogWriterService started")
|
||||
}
|
||||
|
||||
func (s *DBLogWriterService) Stop() {
|
||||
s.logger.Info("DBLogWriterService stopping...")
|
||||
close(s.stopChan)
|
||||
s.cancel() // 取消上下文
|
||||
s.wg.Wait()
|
||||
s.logger.Info("DBLogWriterService stopped.")
|
||||
|
||||
// 输出最终统计
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"total_received": s.totalReceived.Load(),
|
||||
"total_flushed": s.totalFlushed.Load(),
|
||||
"total_dropped": s.totalDropped.Load(),
|
||||
"flush_count": s.flushCount.Load(),
|
||||
}).Info("DBLogWriterService stopped")
|
||||
}
|
||||
|
||||
// 事件监听循环
|
||||
func (s *DBLogWriterService) eventListenerLoop() {
|
||||
defer s.wg.Done()
|
||||
|
||||
ctx := context.Background()
|
||||
sub, err := s.store.Subscribe(ctx, models.TopicRequestFinished)
|
||||
sub, err := s.store.Subscribe(s.ctx, models.TopicRequestFinished)
|
||||
if err != nil {
|
||||
s.logger.Fatalf("Failed to subscribe to topic %s: %v", models.TopicRequestFinished, err)
|
||||
s.logger.WithError(err).Error("Failed to subscribe to request events, log writing disabled")
|
||||
return
|
||||
}
|
||||
defer sub.Close()
|
||||
defer func() {
|
||||
if err := sub.Close(); err != nil {
|
||||
s.logger.WithError(err).Warn("Failed to close subscription")
|
||||
}
|
||||
}()
|
||||
|
||||
s.logger.Info("Subscribed to request events for database logging.")
|
||||
s.logger.Info("Subscribed to request events for database logging")
|
||||
|
||||
for {
|
||||
select {
|
||||
case msg := <-sub.Channel():
|
||||
var event models.RequestFinishedEvent
|
||||
if err := json.Unmarshal(msg.Payload, &event); err != nil {
|
||||
s.logger.Errorf("Failed to unmarshal event for logging: %v", err)
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case s.logBuffer <- &event.RequestLog:
|
||||
default:
|
||||
s.logger.Warn("Log buffer is full. A log message might be dropped.")
|
||||
}
|
||||
s.handleMessage(msg)
|
||||
|
||||
case <-s.stopChan:
|
||||
s.logger.Info("Event listener loop stopping.")
|
||||
s.logger.Info("Event listener loop stopping")
|
||||
close(s.logBuffer)
|
||||
return
|
||||
|
||||
case <-s.ctx.Done():
|
||||
s.logger.Info("Event listener context cancelled")
|
||||
close(s.logBuffer)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 处理单条消息
|
||||
func (s *DBLogWriterService) handleMessage(msg *store.Message) {
|
||||
var event models.RequestFinishedEvent
|
||||
if err := json.Unmarshal(msg.Payload, &event); err != nil {
|
||||
s.logger.WithError(err).Error("Failed to unmarshal request event")
|
||||
return
|
||||
}
|
||||
|
||||
s.totalReceived.Add(1)
|
||||
|
||||
select {
|
||||
case s.logBuffer <- &event.RequestLog:
|
||||
// 成功入队
|
||||
default:
|
||||
// 缓冲区满,丢弃日志
|
||||
dropped := s.totalDropped.Add(1)
|
||||
if dropped%100 == 1 { // 每100条丢失输出一次警告
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"total_dropped": dropped,
|
||||
"buffer_capacity": cap(s.logBuffer),
|
||||
"buffer_len": len(s.logBuffer),
|
||||
}).Warn("Log buffer full, messages being dropped")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 数据库写入循环
|
||||
func (s *DBLogWriterService) dbWriterLoop() {
|
||||
defer s.wg.Done()
|
||||
|
||||
cfg := s.SettingsManager.GetSettings()
|
||||
cfg := s.settingsManager.GetSettings()
|
||||
batchSize := cfg.LogFlushBatchSize
|
||||
if batchSize <= 0 {
|
||||
batchSize = 100
|
||||
}
|
||||
flushTimeout := time.Duration(cfg.LogFlushIntervalSeconds) * time.Second
|
||||
if flushTimeout <= 0 {
|
||||
flushTimeout = 5 * time.Second
|
||||
flushInterval := time.Duration(cfg.LogFlushIntervalSeconds) * time.Second
|
||||
if flushInterval <= 0 {
|
||||
flushInterval = 5 * time.Second
|
||||
}
|
||||
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"batch_size": batchSize,
|
||||
"flush_interval": flushInterval,
|
||||
}).Info("DB writer loop started")
|
||||
|
||||
batch := make([]*models.RequestLog, 0, batchSize)
|
||||
ticker := time.NewTicker(flushTimeout)
|
||||
ticker := time.NewTicker(flushInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// 配置热更新检查(每分钟)
|
||||
configTicker := time.NewTicker(1 * time.Minute)
|
||||
defer configTicker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case logEntry, ok := <-s.logBuffer:
|
||||
if !ok {
|
||||
// 通道关闭,刷新剩余日志
|
||||
if len(batch) > 0 {
|
||||
s.flushBatch(batch)
|
||||
}
|
||||
s.logger.Info("DB writer loop finished.")
|
||||
s.logger.Info("DB writer loop finished")
|
||||
return
|
||||
}
|
||||
|
||||
batch = append(batch, logEntry)
|
||||
if len(batch) >= batchSize {
|
||||
s.flushBatch(batch)
|
||||
batch = make([]*models.RequestLog, 0, batchSize)
|
||||
}
|
||||
|
||||
case <-ticker.C:
|
||||
if len(batch) > 0 {
|
||||
s.flushBatch(batch)
|
||||
batch = make([]*models.RequestLog, 0, batchSize)
|
||||
}
|
||||
|
||||
case <-configTicker.C:
|
||||
// 热更新配置
|
||||
cfg := s.settingsManager.GetSettings()
|
||||
newBatchSize := cfg.LogFlushBatchSize
|
||||
if newBatchSize <= 0 {
|
||||
newBatchSize = 100
|
||||
}
|
||||
newFlushInterval := time.Duration(cfg.LogFlushIntervalSeconds) * time.Second
|
||||
if newFlushInterval <= 0 {
|
||||
newFlushInterval = 5 * time.Second
|
||||
}
|
||||
|
||||
if newBatchSize != batchSize {
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"old": batchSize,
|
||||
"new": newBatchSize,
|
||||
}).Info("Batch size updated")
|
||||
batchSize = newBatchSize
|
||||
if len(batch) >= batchSize {
|
||||
s.flushBatch(batch)
|
||||
batch = make([]*models.RequestLog, 0, batchSize)
|
||||
}
|
||||
}
|
||||
|
||||
if newFlushInterval != flushInterval {
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"old": flushInterval,
|
||||
"new": newFlushInterval,
|
||||
}).Info("Flush interval updated")
|
||||
flushInterval = newFlushInterval
|
||||
ticker.Reset(flushInterval)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 批量刷写到数据库
|
||||
func (s *DBLogWriterService) flushBatch(batch []*models.RequestLog) {
|
||||
if err := s.db.CreateInBatches(batch, len(batch)).Error; err != nil {
|
||||
s.logger.WithField("batch_size", len(batch)).WithError(err).Error("Failed to flush log batch to database.")
|
||||
if len(batch) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
err := s.db.WithContext(ctx).CreateInBatches(batch, len(batch)).Error
|
||||
duration := time.Since(start)
|
||||
|
||||
s.lastFlushMutex.Lock()
|
||||
s.lastFlushTime = time.Now()
|
||||
s.lastFlushMutex.Unlock()
|
||||
|
||||
if err != nil {
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"batch_size": len(batch),
|
||||
"duration": duration,
|
||||
}).WithError(err).Error("Failed to flush log batch to database")
|
||||
} else {
|
||||
s.logger.Infof("Successfully flushed %d logs to database.", len(batch))
|
||||
flushed := s.totalFlushed.Add(uint64(len(batch)))
|
||||
flushCount := s.flushCount.Add(1)
|
||||
|
||||
// 只在慢写入或大批量时输出日志
|
||||
if duration > 1*time.Second || len(batch) > 500 {
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"batch_size": len(batch),
|
||||
"duration": duration,
|
||||
"total_flushed": flushed,
|
||||
"flush_count": flushCount,
|
||||
}).Info("Log batch flushed to database")
|
||||
} else {
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"batch_size": len(batch),
|
||||
"duration": duration,
|
||||
}).Debug("Log batch flushed to database")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 定期输出统计信息
|
||||
func (s *DBLogWriterService) metricsReporter() {
|
||||
defer s.wg.Done()
|
||||
|
||||
ticker := time.NewTicker(5 * time.Minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
s.reportMetrics()
|
||||
case <-s.stopChan:
|
||||
return
|
||||
case <-s.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DBLogWriterService) reportMetrics() {
|
||||
s.lastFlushMutex.RLock()
|
||||
lastFlush := s.lastFlushTime
|
||||
s.lastFlushMutex.RUnlock()
|
||||
|
||||
received := s.totalReceived.Load()
|
||||
flushed := s.totalFlushed.Load()
|
||||
dropped := s.totalDropped.Load()
|
||||
pending := uint64(len(s.logBuffer))
|
||||
|
||||
s.logger.WithFields(logrus.Fields{
|
||||
"received": received,
|
||||
"flushed": flushed,
|
||||
"dropped": dropped,
|
||||
"pending": pending,
|
||||
"flush_count": s.flushCount.Load(),
|
||||
"last_flush": time.Since(lastFlush).Round(time.Second),
|
||||
"buffer_usage": float64(pending) / float64(cap(s.logBuffer)) * 100,
|
||||
"success_rate": float64(flushed) / float64(received) * 100,
|
||||
}).Info("DBLogWriter metrics")
|
||||
}
|
||||
|
||||
// GetMetrics 返回当前统计指标(供监控使用)
|
||||
func (s *DBLogWriterService) GetMetrics() map[string]interface{} {
|
||||
s.lastFlushMutex.RLock()
|
||||
lastFlush := s.lastFlushTime
|
||||
s.lastFlushMutex.RUnlock()
|
||||
|
||||
return map[string]interface{}{
|
||||
"total_received": s.totalReceived.Load(),
|
||||
"total_flushed": s.totalFlushed.Load(),
|
||||
"total_dropped": s.totalDropped.Load(),
|
||||
"flush_count": s.flushCount.Load(),
|
||||
"buffer_pending": len(s.logBuffer),
|
||||
"buffer_capacity": cap(s.logBuffer),
|
||||
"last_flush_ago": time.Since(lastFlush).Seconds(),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user