mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-31 22:28:05 +01:00 
			
		
		
		
	Fix shutdown waitgroup panic (#35676)
This PR fixes a panic issue in the WaitGroup that occurs when Gitea is shut down using Ctrl+C. It ensures that all active connection pointers in the server are properly tracked and forcibly closed when the hammer shutdown is invoked. The process remains graceful — the normal shutdown sequence runs before the hammer is triggered, and existing connections are given a timeout period to complete gracefully. This PR also fixes `no logger writer` problem. Now the log close will only be invoked when the command exit. - Fixes #35468 - Fixes #35551 - Fixes #35559 - Replace #35578 --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
		
							parent
							
								
									cb72c901b3
								
							
						
					
					
						commit
						304d836a61
					
				| @ -156,7 +156,6 @@ func serveInstall(cmd *cli.Command) error { | |||||||
| 	case <-graceful.GetManager().IsShutdown(): | 	case <-graceful.GetManager().IsShutdown(): | ||||||
| 		<-graceful.GetManager().Done() | 		<-graceful.GetManager().Done() | ||||||
| 		log.Info("PID: %d Gitea Web Finished", os.Getpid()) | 		log.Info("PID: %d Gitea Web Finished", os.Getpid()) | ||||||
| 		log.GetManager().Close() |  | ||||||
| 		return err | 		return err | ||||||
| 	default: | 	default: | ||||||
| 	} | 	} | ||||||
| @ -231,7 +230,6 @@ func serveInstalled(c *cli.Command) error { | |||||||
| 	err := listen(webRoutes, true) | 	err := listen(webRoutes, true) | ||||||
| 	<-graceful.GetManager().Done() | 	<-graceful.GetManager().Done() | ||||||
| 	log.Info("PID: %d Gitea Web Finished", os.Getpid()) | 	log.Info("PID: %d Gitea Web Finished", os.Getpid()) | ||||||
| 	log.GetManager().Close() |  | ||||||
| 	return err | 	return err | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										1
									
								
								main.go
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								main.go
									
									
									
									
									
								
							| @ -44,6 +44,7 @@ func main() { | |||||||
| 	} | 	} | ||||||
| 	app := cmd.NewMainApp(cmd.AppVersion{Version: Version, Extra: formatBuiltWith()}) | 	app := cmd.NewMainApp(cmd.AppVersion{Version: Version, Extra: formatBuiltWith()}) | ||||||
| 	_ = cmd.RunMainApp(app, os.Args...) // all errors should have been handled by the RunMainApp | 	_ = cmd.RunMainApp(app, os.Args...) // all errors should have been handled by the RunMainApp | ||||||
|  | 	// flush the queued logs before exiting, it is a MUST, otherwise there will be log loss | ||||||
| 	log.GetManager().Close() | 	log.GetManager().Close() | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -11,7 +11,6 @@ import ( | |||||||
| 	"os" | 	"os" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"sync" | 	"sync" | ||||||
| 	"sync/atomic" |  | ||||||
| 	"syscall" | 	"syscall" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| @ -33,9 +32,12 @@ type Server struct { | |||||||
| 	network  string | 	network  string | ||||||
| 	address  string | 	address  string | ||||||
| 	listener net.Listener | 	listener net.Listener | ||||||
| 	wg                   sync.WaitGroup | 
 | ||||||
|  | 	lock          sync.RWMutex | ||||||
| 	state         state | 	state         state | ||||||
| 	lock                 *sync.RWMutex | 	connCounter   int64 | ||||||
|  | 	connEmptyCond *sync.Cond | ||||||
|  | 
 | ||||||
| 	BeforeBegin          func(network, address string) | 	BeforeBegin          func(network, address string) | ||||||
| 	OnShutdown           func() | 	OnShutdown           func() | ||||||
| 	PerWriteTimeout      time.Duration | 	PerWriteTimeout      time.Duration | ||||||
| @ -50,14 +52,13 @@ func NewServer(network, address, name string) *Server { | |||||||
| 		log.Info("Starting new %s server: %s:%s on PID: %d", name, network, address, os.Getpid()) | 		log.Info("Starting new %s server: %s:%s on PID: %d", name, network, address, os.Getpid()) | ||||||
| 	} | 	} | ||||||
| 	srv := &Server{ | 	srv := &Server{ | ||||||
| 		wg:                   sync.WaitGroup{}, |  | ||||||
| 		state:                stateInit, | 		state:                stateInit, | ||||||
| 		lock:                 &sync.RWMutex{}, |  | ||||||
| 		network:              network, | 		network:              network, | ||||||
| 		address:              address, | 		address:              address, | ||||||
| 		PerWriteTimeout:      setting.PerWriteTimeout, | 		PerWriteTimeout:      setting.PerWriteTimeout, | ||||||
| 		PerWritePerKbTimeout: setting.PerWritePerKbTimeout, | 		PerWritePerKbTimeout: setting.PerWritePerKbTimeout, | ||||||
| 	} | 	} | ||||||
|  | 	srv.connEmptyCond = sync.NewCond(&srv.lock) | ||||||
| 
 | 
 | ||||||
| 	srv.BeforeBegin = func(network, addr string) { | 	srv.BeforeBegin = func(network, addr string) { | ||||||
| 		log.Debug("Starting server on %s:%s (PID: %d)", network, addr, syscall.Getpid()) | 		log.Debug("Starting server on %s:%s (PID: %d)", network, addr, syscall.Getpid()) | ||||||
| @ -154,7 +155,7 @@ func (srv *Server) Serve(serve ServeFunction) error { | |||||||
| 	GetManager().RegisterServer() | 	GetManager().RegisterServer() | ||||||
| 	err := serve(srv.listener) | 	err := serve(srv.listener) | ||||||
| 	log.Debug("Waiting for connections to finish... (PID: %d)", syscall.Getpid()) | 	log.Debug("Waiting for connections to finish... (PID: %d)", syscall.Getpid()) | ||||||
| 	srv.wg.Wait() | 	srv.waitForActiveConnections() | ||||||
| 	srv.setState(stateTerminate) | 	srv.setState(stateTerminate) | ||||||
| 	GetManager().ServerDone() | 	GetManager().ServerDone() | ||||||
| 	// use of closed means that the listeners are closed - i.e. we should be shutting down - return nil | 	// use of closed means that the listeners are closed - i.e. we should be shutting down - return nil | ||||||
| @ -178,16 +179,62 @@ func (srv *Server) setState(st state) { | |||||||
| 	srv.state = st | 	srv.state = st | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func (srv *Server) waitForActiveConnections() { | ||||||
|  | 	srv.lock.Lock() | ||||||
|  | 	for srv.connCounter > 0 { | ||||||
|  | 		srv.connEmptyCond.Wait() | ||||||
|  | 	} | ||||||
|  | 	srv.lock.Unlock() | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (srv *Server) wrapConnection(c net.Conn) (net.Conn, error) { | ||||||
|  | 	srv.lock.Lock() | ||||||
|  | 	defer srv.lock.Unlock() | ||||||
|  | 
 | ||||||
|  | 	if srv.state != stateRunning { | ||||||
|  | 		_ = c.Close() | ||||||
|  | 		return nil, syscall.EINVAL // same as AcceptTCP | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	srv.connCounter++ | ||||||
|  | 	return &wrappedConn{Conn: c, server: srv}, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (srv *Server) removeConnection(_ *wrappedConn) { | ||||||
|  | 	srv.lock.Lock() | ||||||
|  | 	defer srv.lock.Unlock() | ||||||
|  | 
 | ||||||
|  | 	srv.connCounter-- | ||||||
|  | 	if srv.connCounter <= 0 { | ||||||
|  | 		srv.connEmptyCond.Broadcast() | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // closeAllConnections forcefully closes all active connections | ||||||
|  | func (srv *Server) closeAllConnections() { | ||||||
|  | 	srv.lock.Lock() | ||||||
|  | 	if srv.connCounter > 0 { | ||||||
|  | 		log.Warn("After graceful shutdown period, %d connections are still active. Forcefully close.", srv.connCounter) | ||||||
|  | 		srv.connCounter = 0 // OS will close all the connections after the process exits, so we just assume there is no active connection now | ||||||
|  | 	} | ||||||
|  | 	srv.lock.Unlock() | ||||||
|  | 	srv.connEmptyCond.Broadcast() | ||||||
|  | } | ||||||
|  | 
 | ||||||
| type filer interface { | type filer interface { | ||||||
| 	File() (*os.File, error) | 	File() (*os.File, error) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| type wrappedListener struct { | type wrappedListener struct { | ||||||
| 	net.Listener | 	net.Listener | ||||||
| 	stopped bool |  | ||||||
| 	server *Server | 	server *Server | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | var ( | ||||||
|  | 	_ net.Listener = (*wrappedListener)(nil) | ||||||
|  | 	_ filer        = (*wrappedListener)(nil) | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| func newWrappedListener(l net.Listener, srv *Server) *wrappedListener { | func newWrappedListener(l net.Listener, srv *Server) *wrappedListener { | ||||||
| 	return &wrappedListener{ | 	return &wrappedListener{ | ||||||
| 		Listener: l, | 		Listener: l, | ||||||
| @ -195,46 +242,24 @@ func newWrappedListener(l net.Listener, srv *Server) *wrappedListener { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (wl *wrappedListener) Accept() (net.Conn, error) { | func (wl *wrappedListener) Accept() (c net.Conn, err error) { | ||||||
| 	var c net.Conn |  | ||||||
| 	// Set keepalive on TCPListeners connections. |  | ||||||
| 	if tcl, ok := wl.Listener.(*net.TCPListener); ok { | 	if tcl, ok := wl.Listener.(*net.TCPListener); ok { | ||||||
|  | 		// Set keepalive on TCPListeners connections if possible, see http.tcpKeepAliveListener | ||||||
| 		tc, err := tcl.AcceptTCP() | 		tc, err := tcl.AcceptTCP() | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
| 		_ = tc.SetKeepAlive(true)                  // see http.tcpKeepAliveListener | 		_ = tc.SetKeepAlive(true) | ||||||
| 		_ = tc.SetKeepAlivePeriod(3 * time.Minute) // see http.tcpKeepAliveListener | 		_ = tc.SetKeepAlivePeriod(3 * time.Minute) | ||||||
| 		c = tc | 		c = tc | ||||||
| 	} else { | 	} else { | ||||||
| 		var err error |  | ||||||
| 		c, err = wl.Listener.Accept() | 		c, err = wl.Listener.Accept() | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	closed := int32(0) | 	return wl.server.wrapConnection(c) | ||||||
| 
 |  | ||||||
| 	c = &wrappedConn{ |  | ||||||
| 		Conn:                 c, |  | ||||||
| 		server:               wl.server, |  | ||||||
| 		closed:               &closed, |  | ||||||
| 		perWriteTimeout:      wl.server.PerWriteTimeout, |  | ||||||
| 		perWritePerKbTimeout: wl.server.PerWritePerKbTimeout, |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	wl.server.wg.Add(1) |  | ||||||
| 	return c, nil |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func (wl *wrappedListener) Close() error { |  | ||||||
| 	if wl.stopped { |  | ||||||
| 		return syscall.EINVAL |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	wl.stopped = true |  | ||||||
| 	return wl.Listener.Close() |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (wl *wrappedListener) File() (*os.File, error) { | func (wl *wrappedListener) File() (*os.File, error) { | ||||||
| @ -245,16 +270,13 @@ func (wl *wrappedListener) File() (*os.File, error) { | |||||||
| type wrappedConn struct { | type wrappedConn struct { | ||||||
| 	net.Conn | 	net.Conn | ||||||
| 	server   *Server | 	server   *Server | ||||||
| 	closed               *int32 |  | ||||||
| 	deadline time.Time | 	deadline time.Time | ||||||
| 	perWriteTimeout      time.Duration |  | ||||||
| 	perWritePerKbTimeout time.Duration |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (w *wrappedConn) Write(p []byte) (n int, err error) { | func (w *wrappedConn) Write(p []byte) (n int, err error) { | ||||||
| 	if w.perWriteTimeout > 0 { | 	if w.server.PerWriteTimeout > 0 { | ||||||
| 		minTimeout := time.Duration(len(p)/1024) * w.perWritePerKbTimeout | 		minTimeout := time.Duration(len(p)/1024) * w.server.PerWritePerKbTimeout | ||||||
| 		minDeadline := time.Now().Add(minTimeout).Add(w.perWriteTimeout) | 		minDeadline := time.Now().Add(minTimeout).Add(w.server.PerWriteTimeout) | ||||||
| 
 | 
 | ||||||
| 		w.deadline = w.deadline.Add(minTimeout) | 		w.deadline = w.deadline.Add(minTimeout) | ||||||
| 		if minDeadline.After(w.deadline) { | 		if minDeadline.After(w.deadline) { | ||||||
| @ -266,19 +288,6 @@ func (w *wrappedConn) Write(p []byte) (n int, err error) { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (w *wrappedConn) Close() error { | func (w *wrappedConn) Close() error { | ||||||
| 	if atomic.CompareAndSwapInt32(w.closed, 0, 1) { | 	w.server.removeConnection(w) | ||||||
| 		defer func() { |  | ||||||
| 			if err := recover(); err != nil { |  | ||||||
| 				select { |  | ||||||
| 				case <-GetManager().IsHammer(): |  | ||||||
| 					// Likely deadlocked request released at hammertime |  | ||||||
| 					log.Warn("Panic during connection close! %v. Likely there has been a deadlocked request which has been released by forced shutdown.", err) |  | ||||||
| 				default: |  | ||||||
| 					log.Error("Panic during connection close! %v", err) |  | ||||||
| 				} |  | ||||||
| 			} |  | ||||||
| 		}() |  | ||||||
| 		w.server.wg.Done() |  | ||||||
| 	} |  | ||||||
| 	return w.Conn.Close() | 	return w.Conn.Close() | ||||||
| } | } | ||||||
|  | |||||||
| @ -5,7 +5,6 @@ package graceful | |||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"os" | 	"os" | ||||||
| 	"runtime" |  | ||||||
| 
 | 
 | ||||||
| 	"code.gitea.io/gitea/modules/log" | 	"code.gitea.io/gitea/modules/log" | ||||||
| ) | ) | ||||||
| @ -48,26 +47,8 @@ func (srv *Server) doShutdown() { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (srv *Server) doHammer() { | func (srv *Server) doHammer() { | ||||||
| 	defer func() { |  | ||||||
| 		// We call srv.wg.Done() until it panics. |  | ||||||
| 		// This happens if we call Done() when the WaitGroup counter is already at 0 |  | ||||||
| 		// So if it panics -> we're done, Serve() will return and the |  | ||||||
| 		// parent will goroutine will exit. |  | ||||||
| 		if r := recover(); r != nil { |  | ||||||
| 			log.Error("WaitGroup at 0: Error: %v", r) |  | ||||||
| 		} |  | ||||||
| 	}() |  | ||||||
| 	if srv.getState() != stateShuttingDown { | 	if srv.getState() != stateShuttingDown { | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 	log.Warn("Forcefully shutting down parent") | 	srv.closeAllConnections() | ||||||
| 	for { |  | ||||||
| 		if srv.getState() == stateTerminate { |  | ||||||
| 			break |  | ||||||
| 		} |  | ||||||
| 		srv.wg.Done() |  | ||||||
| 
 |  | ||||||
| 		// Give other goroutines a chance to finish before we forcibly stop them. |  | ||||||
| 		runtime.Gosched() |  | ||||||
| 	} |  | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user