1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"internal/goarch"
9	"internal/runtime/atomic"
10	"unsafe"
11)
12
13// Solaris runtime-integrated network poller.
14//
15// Solaris uses event ports for scalable network I/O. Event
16// ports are level-triggered, unlike epoll and kqueue which
17// can be configured in both level-triggered and edge-triggered
18// mode. Level triggering means we have to keep track of a few things
19// ourselves. After we receive an event for a file descriptor,
20// it's our responsibility to ask again to be notified for future
21// events for that descriptor. When doing this we must keep track of
22// what kind of events the goroutines are currently interested in,
23// for example a fd may be open both for reading and writing.
24//
25// A description of the high level operation of this code
26// follows. Networking code will get a file descriptor by some means
27// and will register it with the netpolling mechanism by a code path
28// that eventually calls runtime·netpollopen. runtime·netpollopen
29// calls port_associate with an empty event set. That means that we
30// will not receive any events at this point. The association needs
31// to be done at this early point because we need to process the I/O
32// readiness notification at some point in the future. If I/O becomes
33// ready when nobody is listening, when we finally care about it,
34// nobody will tell us anymore.
35//
36// Beside calling runtime·netpollopen, the networking code paths
37// will call runtime·netpollarm each time goroutines are interested
38// in doing network I/O. Because now we know what kind of I/O we
39// are interested in (reading/writing), we can call port_associate
40// passing the correct type of event set (POLLIN/POLLOUT). As we made
41// sure to have already associated the file descriptor with the port,
42// when we now call port_associate, we will unblock the main poller
43// loop (in runtime·netpoll) right away if the socket is actually
44// ready for I/O.
45//
46// The main poller loop runs in its own thread waiting for events
47// using port_getn. When an event happens, it will tell the scheduler
48// about it using runtime·netpollready. Besides doing this, it must
49// also re-associate the events that were not part of this current
50// notification with the file descriptor. Failing to do this would
51// mean each notification will prevent concurrent code using the
52// same file descriptor in parallel.
53//
54// The logic dealing with re-associations is encapsulated in
55// runtime·netpollupdate. This function takes care to associate the
56// descriptor only with the subset of events that were previously
57// part of the association, except the one that just happened. We
58// can't re-associate with that right away, because event ports
59// are level triggered so it would cause a busy loop. Instead, that
60// association is effected only by the runtime·netpollarm code path,
61// when Go code actually asks for I/O.
62//
63// The open and arming mechanisms are serialized using the lock
64// inside PollDesc. This is required because the netpoll loop runs
65// asynchronously in respect to other Go code and by the time we get
66// to call port_associate to update the association in the loop, the
67// file descriptor might have been closed and reopened already. The
68// lock allows runtime·netpollupdate to be called synchronously from
69// the loop thread while preventing other threads operating to the
70// same PollDesc, so once we unblock in the main loop, until we loop
71// again we know for sure we are always talking about the same file
72// descriptor and can safely access the data we want (the event set).
73
74//go:cgo_import_dynamic libc_port_create port_create "libc.so"
75//go:cgo_import_dynamic libc_port_associate port_associate "libc.so"
76//go:cgo_import_dynamic libc_port_dissociate port_dissociate "libc.so"
77//go:cgo_import_dynamic libc_port_getn port_getn "libc.so"
78//go:cgo_import_dynamic libc_port_alert port_alert "libc.so"
79
80//go:linkname libc_port_create libc_port_create
81//go:linkname libc_port_associate libc_port_associate
82//go:linkname libc_port_dissociate libc_port_dissociate
83//go:linkname libc_port_getn libc_port_getn
84//go:linkname libc_port_alert libc_port_alert
85
86var (
87	libc_port_create,
88	libc_port_associate,
89	libc_port_dissociate,
90	libc_port_getn,
91	libc_port_alert libcFunc
92	netpollWakeSig atomic.Uint32 // used to avoid duplicate calls of netpollBreak
93)
94
95func errno() int32 {
96	return *getg().m.perrno
97}
98
99func port_create() int32 {
100	return int32(sysvicall0(&libc_port_create))
101}
102
103func port_associate(port, source int32, object uintptr, events uint32, user uintptr) int32 {
104	return int32(sysvicall5(&libc_port_associate, uintptr(port), uintptr(source), object, uintptr(events), user))
105}
106
107func port_dissociate(port, source int32, object uintptr) int32 {
108	return int32(sysvicall3(&libc_port_dissociate, uintptr(port), uintptr(source), object))
109}
110
111func port_getn(port int32, evs *portevent, max uint32, nget *uint32, timeout *timespec) int32 {
112	return int32(sysvicall5(&libc_port_getn, uintptr(port), uintptr(unsafe.Pointer(evs)), uintptr(max), uintptr(unsafe.Pointer(nget)), uintptr(unsafe.Pointer(timeout))))
113}
114
115func port_alert(port int32, flags, events uint32, user uintptr) int32 {
116	return int32(sysvicall4(&libc_port_alert, uintptr(port), uintptr(flags), uintptr(events), user))
117}
118
119var portfd int32 = -1
120
121func netpollinit() {
122	portfd = port_create()
123	if portfd >= 0 {
124		closeonexec(portfd)
125		return
126	}
127
128	print("runtime: port_create failed (errno=", errno(), ")\n")
129	throw("runtime: netpollinit failed")
130}
131
132func netpollIsPollDescriptor(fd uintptr) bool {
133	return fd == uintptr(portfd)
134}
135
136func netpollopen(fd uintptr, pd *pollDesc) int32 {
137	lock(&pd.lock)
138	// We don't register for any specific type of events yet, that's
139	// netpollarm's job. We merely ensure we call port_associate before
140	// asynchronous connect/accept completes, so when we actually want
141	// to do any I/O, the call to port_associate (from netpollarm,
142	// with the interested event set) will unblock port_getn right away
143	// because of the I/O readiness notification.
144	pd.user = 0
145	tp := taggedPointerPack(unsafe.Pointer(pd), pd.fdseq.Load())
146	// Note that this won't work on a 32-bit system,
147	// as taggedPointer is always 64-bits but uintptr will be 32 bits.
148	// Fortunately we only support Solaris on amd64.
149	if goarch.PtrSize != 8 {
150		throw("runtime: netpollopen: unsupported pointer size")
151	}
152	r := port_associate(portfd, _PORT_SOURCE_FD, fd, 0, uintptr(tp))
153	unlock(&pd.lock)
154	return r
155}
156
157func netpollclose(fd uintptr) int32 {
158	return port_dissociate(portfd, _PORT_SOURCE_FD, fd)
159}
160
161// Updates the association with a new set of interested events. After
162// this call, port_getn will return one and only one event for that
163// particular descriptor, so this function needs to be called again.
164func netpollupdate(pd *pollDesc, set, clear uint32) {
165	if pd.info().closing() {
166		return
167	}
168
169	old := pd.user
170	events := (old & ^clear) | set
171	if old == events {
172		return
173	}
174
175	tp := taggedPointerPack(unsafe.Pointer(pd), pd.fdseq.Load())
176	if events != 0 && port_associate(portfd, _PORT_SOURCE_FD, pd.fd, events, uintptr(tp)) != 0 {
177		print("runtime: port_associate failed (errno=", errno(), ")\n")
178		throw("runtime: netpollupdate failed")
179	}
180	pd.user = events
181}
182
183// subscribe the fd to the port such that port_getn will return one event.
184func netpollarm(pd *pollDesc, mode int) {
185	lock(&pd.lock)
186	switch mode {
187	case 'r':
188		netpollupdate(pd, _POLLIN, 0)
189	case 'w':
190		netpollupdate(pd, _POLLOUT, 0)
191	default:
192		throw("runtime: bad mode")
193	}
194	unlock(&pd.lock)
195}
196
197// netpollBreak interrupts a port_getn wait.
198func netpollBreak() {
199	// Failing to cas indicates there is an in-flight wakeup, so we're done here.
200	if !netpollWakeSig.CompareAndSwap(0, 1) {
201		return
202	}
203
204	// Use port_alert to put portfd into alert mode.
205	// This will wake up all threads sleeping in port_getn on portfd,
206	// and cause their calls to port_getn to return immediately.
207	// Further, until portfd is taken out of alert mode,
208	// all calls to port_getn will return immediately.
209	if port_alert(portfd, _PORT_ALERT_UPDATE, _POLLHUP, uintptr(unsafe.Pointer(&portfd))) < 0 {
210		if e := errno(); e != _EBUSY {
211			println("runtime: port_alert failed with", e)
212			throw("runtime: netpoll: port_alert failed")
213		}
214	}
215}
216
217// netpoll checks for ready network connections.
218// Returns list of goroutines that become runnable.
219// delay < 0: blocks indefinitely
220// delay == 0: does not block, just polls
221// delay > 0: block for up to that many nanoseconds
222func netpoll(delay int64) (gList, int32) {
223	if portfd == -1 {
224		return gList{}, 0
225	}
226
227	var wait *timespec
228	var ts timespec
229	if delay < 0 {
230		wait = nil
231	} else if delay == 0 {
232		wait = &ts
233	} else {
234		ts.setNsec(delay)
235		if ts.tv_sec > 1e6 {
236			// An arbitrary cap on how long to wait for a timer.
237			// 1e6 s == ~11.5 days.
238			ts.tv_sec = 1e6
239		}
240		wait = &ts
241	}
242
243	var events [128]portevent
244retry:
245	var n uint32 = 1
246	r := port_getn(portfd, &events[0], uint32(len(events)), &n, wait)
247	e := errno()
248	if r < 0 && e == _ETIME && n > 0 {
249		// As per port_getn(3C), an ETIME failure does not preclude the
250		// delivery of some number of events.  Treat a timeout failure
251		// with delivered events as a success.
252		r = 0
253	}
254	if r < 0 {
255		if e != _EINTR && e != _ETIME {
256			print("runtime: port_getn on fd ", portfd, " failed (errno=", e, ")\n")
257			throw("runtime: netpoll failed")
258		}
259		// If a timed sleep was interrupted and there are no events,
260		// just return to recalculate how long we should sleep now.
261		if delay > 0 {
262			return gList{}, 0
263		}
264		goto retry
265	}
266
267	var toRun gList
268	delta := int32(0)
269	for i := 0; i < int(n); i++ {
270		ev := &events[i]
271
272		if ev.portev_source == _PORT_SOURCE_ALERT {
273			if ev.portev_events != _POLLHUP || unsafe.Pointer(ev.portev_user) != unsafe.Pointer(&portfd) {
274				throw("runtime: netpoll: bad port_alert wakeup")
275			}
276			if delay != 0 {
277				// Now that a blocking call to netpoll
278				// has seen the alert, take portfd
279				// back out of alert mode.
280				// See the comment in netpollBreak.
281				if port_alert(portfd, 0, 0, 0) < 0 {
282					e := errno()
283					println("runtime: port_alert failed with", e)
284					throw("runtime: netpoll: port_alert failed")
285				}
286				netpollWakeSig.Store(0)
287			}
288			continue
289		}
290
291		if ev.portev_events == 0 {
292			continue
293		}
294
295		tp := taggedPointer(uintptr(unsafe.Pointer(ev.portev_user)))
296		pd := (*pollDesc)(tp.pointer())
297		if pd.fdseq.Load() != tp.tag() {
298			continue
299		}
300
301		var mode, clear int32
302		if (ev.portev_events & (_POLLIN | _POLLHUP | _POLLERR)) != 0 {
303			mode += 'r'
304			clear |= _POLLIN
305		}
306		if (ev.portev_events & (_POLLOUT | _POLLHUP | _POLLERR)) != 0 {
307			mode += 'w'
308			clear |= _POLLOUT
309		}
310		// To effect edge-triggered events, we need to be sure to
311		// update our association with whatever events were not
312		// set with the event. For example if we are registered
313		// for POLLIN|POLLOUT, and we get POLLIN, besides waking
314		// the goroutine interested in POLLIN we have to not forget
315		// about the one interested in POLLOUT.
316		if clear != 0 {
317			lock(&pd.lock)
318			netpollupdate(pd, 0, uint32(clear))
319			unlock(&pd.lock)
320		}
321
322		if mode != 0 {
323			// TODO(mikio): Consider implementing event
324			// scanning error reporting once we are sure
325			// about the event port on SmartOS.
326			//
327			// See golang.org/x/issue/30840.
328			delta += netpollready(&toRun, pd, mode)
329		}
330	}
331
332	return toRun, delta
333}
334