1    	/*
2    	 * Copyright (c) 2012 Linux Box Corporation.
3    	 * Copyright (c) 2013-2018 Red Hat, Inc. and/or its affiliates.
4    	 * All rights reserved.
5    	 *
6    	 * Redistribution and use in source and binary forms, with or without
7    	 * modification, are permitted provided that the following conditions
8    	 * are met:
9    	 * 1. Redistributions of source code must retain the above copyright
10   	 *    notice, this list of conditions and the following disclaimer.
11   	 * 2. Redistributions in binary form must reproduce the above copyright
12   	 *    notice, this list of conditions and the following disclaimer in the
13   	 *    documentation and/or other materials provided with the distribution.
14   	 *
15   	 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
16   	 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17   	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18   	 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19   	 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20   	 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21   	 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22   	 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23   	 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24   	 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25   	 */
26   	
27   	#include "config.h"
28   	
29   	#include <sys/types.h>
30   	#include <sys/poll.h>
31   	#include <stdint.h>
32   	#include <assert.h>
33   	#include <err.h>
34   	#include <errno.h>
35   	#include <unistd.h>
36   	#include <signal.h>
37   	#include <rpc/types.h>
38   	#include <misc/portable.h>
39   	#include <rpc/rpc.h>
40   	#include "rpc_com.h"
41   	#include "svc_internal.h"
42   	#include "svc_xprt.h"
43   	
44   	/**
45   	 * @file svc_xprt.c
46   	 * @contributeur William Allen Simpson <bill@cohortfs.com>
47   	 * @brief Service transports package
48   	 *
49   	 * @section DESCRIPTION
50   	 *
51   	 * Maintains a tree of all extant transports by fd.
52   	 *
53   	 * Each SVCXPRT has its own instance, however, so operations to
54   	 * close and delete (for example) given an existing xprt handle
55   	 * are O(1) without any ordered or hashed representation.
56   	 *
57   	 * @note currently static sizes
58   	 *	partitions should be largish prime, relative to connections.
59   	 *	no cache slots, as rpc_dplx_rec has fd_node for direct access.
60   	 */
61   	
62   	#define SVC_XPRT_PARTITIONS 193
63   	
64   	static bool initialized;
65   	
66   	struct svc_xprt_fd {
67   		mutex_t lock;
68   		struct rbtree_x xt;
69   		uint32_t connections;
70   	};
71   	
72   	static struct svc_xprt_fd svc_xprt_fd = {
73   		MUTEX_INITIALIZER /* svc_xprt_lock */ ,
74   		{
75   		 SVC_XPRT_PARTITIONS,	/* npart */
76   		 RBT_X_FLAG_NONE,	/* flags */
77   		 0,			/* cachesz */
78   		 NULL			/* tree */
79   		}			/* xt */
80   	};
81   	
82   	static inline int
83   	svc_xprt_fd_cmpf(const struct opr_rbtree_node *lhs,
84   			 const struct opr_rbtree_node *rhs)
85   	{
86   		struct rpc_dplx_rec *lk, *rk;
87   	
88   		lk = opr_containerof(lhs, struct rpc_dplx_rec, fd_node);
89   		rk = opr_containerof(rhs, struct rpc_dplx_rec, fd_node);
90   	
91   		if (lk->xprt.xp_fd < rk->xprt.xp_fd)
92   			return (-1);
93   	
94   		if (lk->xprt.xp_fd == rk->xprt.xp_fd)
95   			return (0);
96   	
97   		return (1);
98   	}
99   	
100  	int
101  	svc_xprt_init(void)
102  	{
103  		int code = 0;
104  	
105  		mutex_lock(&svc_xprt_fd.lock);
106  	
107  		if (initialized)
108  			goto unlock;
109  	
110  		/* one of advantages of this RBT is convenience of external
111  		 * iteration, we'll go to that shortly */
112  		code =
113  		    rbtx_init(&svc_xprt_fd.xt, svc_xprt_fd_cmpf /* NULL (inline) */ ,
114  			      SVC_XPRT_PARTITIONS, RBT_X_FLAG_ALLOC);
115  		if (code)
116  			__warnx(TIRPC_DEBUG_FLAG_SVC_XPRT,
117  				"svc_xprt_init: rbtx_init failed");
118  	
119  		initialized = true;
120  	
121  	 unlock:
122  		mutex_unlock(&svc_xprt_fd.lock);
123  		return (code);
124  	}
125  	
126  	static inline bool
127  	svc_xprt_init_failure(void)
128  	{
129  		if (initialized)
130  			return (false);
131  		return (svc_xprt_init() != 0);
132  	}
133  	
134  	/*
135  	 * On success, returns with RPC_DPLX_LOCKED
136  	 */
137  	SVCXPRT *
138  	svc_xprt_lookup(int fd, svc_xprt_setup_t setup)
139  	{
140  		struct rpc_dplx_rec sk;
141  		struct rpc_dplx_rec *rec;
142  		struct rbtree_x_part *t;
143  		struct opr_rbtree_node *nv;
144  		SVCXPRT *xprt = NULL;
145  		uint16_t xp_flags;
146  	
147  		if (svc_xprt_init_failure())
148  			return (NULL);
149  	
150  		sk.xprt.xp_fd = fd;
151  		t = rbtx_partition_of_scalar(&svc_xprt_fd.xt, fd);
152  	
153  		rwlock_rdlock(&t->lock);
154  		nv = opr_rbtree_lookup(&t->t, &sk.fd_node);
155  		if (!nv) {
156  			rwlock_unlock(&t->lock);
157  			if (!setup)
158  				return (NULL);
159  	
160  			rwlock_wrlock(&t->lock);
161  			nv = opr_rbtree_lookup(&t->t, &sk.fd_node);
162  			if (!nv) {
163  				if (atomic_inc_uint32_t(&svc_xprt_fd.connections)
164  				    > __svc_params->max_connections) {
165  					atomic_dec_uint32_t(&svc_xprt_fd.connections);
166  					rwlock_unlock(&t->lock);
167  					__warnx(TIRPC_DEBUG_FLAG_ERROR,
168  						"%s: fd %d max_connections %u exceeded\n",
169  						__func__, fd,
170  						__svc_params->max_connections);
171  					return (NULL);
172  				}
173  				(*setup)(&xprt); /* zalloc, xp_refcnt = 1 */
174  				xprt->xp_fd = fd;
175  				xprt->xp_flags = SVC_XPRT_FLAG_INITIAL;
176  	
177  				/* Get ref for caller */
178  				SVC_REF(xprt, SVC_REF_FLAG_NONE);
179  	
180  				rec = REC_XPRT(xprt);
181  				rpc_dplx_rli(rec);
182  				if (opr_rbtree_insert(&t->t, &rec->fd_node)) {
183  					/* cant happen */
184  					rpc_dplx_rui(rec);
185  					__warnx(TIRPC_DEBUG_FLAG_LOCK,
186  						"%s: collision inserting in locked rbtree partition",
187  						__func__);
188  					(*setup)(&xprt);	/* free, sets NULL */
189  					atomic_dec_uint32_t(&svc_xprt_fd.connections);
190  				}
191  				rwlock_unlock(&t->lock);
192  				return (xprt);
193  			}
194  			/* raced, fallthru */
195  		}
196  		rec = opr_containerof(nv, struct rpc_dplx_rec, fd_node);
197  		xprt = &rec->xprt;
198  	
199  		/* lookup reference before unlock ensures shutdown cannot release */
200  		SVC_REF(xprt, SVC_REF_FLAG_NONE);
201  		rwlock_unlock(&t->lock);
202  	
203  		/* unlocked window here permits shutdown to destroy without release;
204  		 * then duplex lock is required to match allocation return,
205  		 * ensuring SVC_XPRT_FLAG_INITIAL cleared in this thread only
206  		 * (obviating extra atomic fetch).
207  		 */
208  		rpc_dplx_rli(rec);
209  		xp_flags = atomic_clear_uint16_t_bits(&xprt->xp_flags,
210  						      SVC_XPRT_FLAG_INITIAL);
211  		if (!(xp_flags & SVC_XPRT_FLAG_DESTROYED)) {
212  			/* do not return destroyed xprts */
213  			return (xprt);
214  		}
215  	
216  		/* unlock before release permits releasing here after destroy */
217  		rpc_dplx_rui(rec);
218  		SVC_RELEASE(xprt, SVC_RELEASE_FLAG_NONE);
219  		return (NULL);
220  	}
221  	
222  	/**
223  	 * Clear an xprt
224  	 *
225  	 * @note Locking
226  	 * - xprt is locked
227  	 *   returned locked
228  	 */
229  	void
230  	svc_xprt_clear(SVCXPRT *xprt)
231  	{
232  		struct rbtree_x_part *t;
233  	
234  		if (svc_xprt_init_failure())
235  			return;
236  	
237  		/* xprt lock ensures only one active thread here */
238  		if (opr_rbtree_node_valid(&REC_XPRT(xprt)->fd_node)) {
239  			t = rbtx_partition_of_scalar(&svc_xprt_fd.xt, xprt->xp_fd);
240  	
241  			/* if another thread passes test during svc_xprt_shutdown(),
242  			 * this lock (and generation test) prevents repeats.
243  			 */
244  			atomic_dec_uint32_t(&svc_xprt_fd.connections);
245  			rwlock_wrlock(&t->lock);
246  			opr_rbtree_remove(&t->t, &REC_XPRT(xprt)->fd_node);
247  			rwlock_unlock(&t->lock);
248  		}
249  	}
250  	
251  	int
252  	svc_xprt_foreach(svc_xprt_each_func_t each_f, void *arg)
253  	{
254  		struct rpc_dplx_rec sk;
255  		struct rpc_dplx_rec *rec;
256  		struct rbtree_x_part *t;
257  		struct opr_rbtree_node *n;
258  		uint64_t tgen;
259  		int p_ix;
260  		int x_ix;
261  		int restarts;
262  	
(1) Event cond_false: Condition "svc_xprt_init_failure()", taking false branch.
263  		if (svc_xprt_init_failure())
(2) Event if_end: End of if statement.
264  			return (-1);
265  	
266  		/* concurrent, restartable iteration over t */
267  		p_ix = 0;
(3) Event cond_true: Condition "p_ix < 193", taking true branch.
(13) Event loop_begin: Jumped back to beginning of loop.
(14) Event cond_true: Condition "p_ix < 193", taking true branch.
(24) Event loop_begin: Jumped back to beginning of loop.
(25) Event cond_true: Condition "p_ix < 193", taking true branch.
268  		while (p_ix < SVC_XPRT_PARTITIONS) {
269  			t = &svc_xprt_fd.xt.tree[p_ix];
270  			restarts = 0;
271  			/* TI-RPC __svc_clean_idle held global svc_fd_lock
272  			 * exclusive locked for a full scan of the legacy svc_xprts
273  			 * array.  We avoid this via tree partitioning and by
274  			 * operating mostly unlocked. */
(35) Event label: Reached label "restart".
(45) Event label: Reached label "restart".
(86) Event label: Reached label "restart".
275  	 restart:
(4) Event cond_false: Condition "++restarts > 5", taking false branch.
(15) Event cond_false: Condition "++restarts > 5", taking false branch.
(26) Event cond_false: Condition "++restarts > 5", taking false branch.
(36) Event cond_false: Condition "++restarts > 5", taking false branch.
(46) Event cond_false: Condition "++restarts > 5", taking false branch.
(87) Event cond_false: Condition "++restarts > 5", taking false branch.
276  			if (++restarts > 5)
(5) Event if_end: End of if statement.
(16) Event if_end: End of if statement.
(27) Event if_end: End of if statement.
(37) Event if_end: End of if statement.
(47) Event if_end: End of if statement.
(88) Event if_end: End of if statement.
277  				return (1);
278  	
279  			/* start with rlock */
(6) Event cond_false: Condition "rc != 0", taking false branch.
(7) Event if_end: End of if statement.
(17) Event cond_false: Condition "rc != 0", taking false branch.
(18) Event if_end: End of if statement.
(28) Event cond_false: Condition "rc != 0", taking false branch.
(29) Event if_end: End of if statement.
(38) Event cond_false: Condition "rc != 0", taking false branch.
(39) Event if_end: End of if statement.
(48) Event cond_false: Condition "rc != 0", taking false branch.
(49) Event if_end: End of if statement.
(89) Event lock: Locking "&t->lock".
(90) Event cond_false: Condition "rc != 0", taking false branch.
(91) Event if_end: End of if statement.
Also see events: [def][unlock][lockagain][use]
280  			rwlock_rdlock(&t->lock);	/* t RLOCKED */
281  			tgen = t->t.gen;
282  			x_ix = 0;
(92) Event def: Assigning data that might be protected by the lock to "n".
Also see events: [lock][unlock][lockagain][use]
283  			n = opr_rbtree_first(&t->t);
(8) Event cond_false: Condition "n != NULL", taking false branch.
(19) Event cond_false: Condition "n != NULL", taking false branch.
(30) Event cond_true: Condition "n != NULL", taking true branch.
(40) Event cond_true: Condition "n != NULL", taking true branch.
(50) Event cond_true: Condition "n != NULL", taking true branch.
(61) Event loop_begin: Jumped back to beginning of loop.
(62) Event cond_true: Condition "n != NULL", taking true branch.
(73) Event loop_begin: Jumped back to beginning of loop.
(74) Event cond_true: Condition "n != NULL", taking true branch.
(93) Event cond_true: Condition "n != NULL", taking true branch.
284  			while (n != NULL) {
285  				++x_ix;	/* diagnostic, index into logical srec
286  					 * sequence */
287  				rec = opr_containerof(n, struct rpc_dplx_rec, fd_node);
288  				sk.xprt.xp_fd = rec->xprt.xp_fd;
289  	
290  				/* call each_func with t !LOCKED */
(31) Event cond_false: Condition "rc != 0", taking false branch.
(32) Event if_end: End of if statement.
(41) Event cond_false: Condition "rc != 0", taking false branch.
(42) Event if_end: End of if statement.
(51) Event cond_false: Condition "rc != 0", taking false branch.
(52) Event if_end: End of if statement.
(63) Event cond_false: Condition "rc != 0", taking false branch.
(64) Event if_end: End of if statement.
(75) Event cond_false: Condition "rc != 0", taking false branch.
(76) Event if_end: End of if statement.
(94) Event unlock: Unlocking "&t->lock". "n" might now be unreliable because other threads can now change the data that it depends on.
(95) Event cond_false: Condition "rc != 0", taking false branch.
(96) Event if_end: End of if statement.
Also see events: [lock][def][lockagain][use]
291  				rwlock_unlock(&t->lock);
292  	
293  				/* restart if each_f disposed xprt */
(33) Event cond_true: Condition "(*each_f)(&rec->xprt, arg)", taking true branch.
(43) Event cond_true: Condition "(*each_f)(&rec->xprt, arg)", taking true branch.
(53) Event cond_false: Condition "(*each_f)(&rec->xprt, arg)", taking false branch.
(65) Event cond_false: Condition "(*each_f)(&rec->xprt, arg)", taking false branch.
(77) Event cond_false: Condition "(*each_f)(&rec->xprt, arg)", taking false branch.
(97) Event cond_false: Condition "(*each_f)(&rec->xprt, arg)", taking false branch.
294  				if (each_f(&rec->xprt, arg))
(34) Event goto: Jumping to label "restart".
(44) Event goto: Jumping to label "restart".
(54) Event if_end: End of if statement.
(66) Event if_end: End of if statement.
(78) Event if_end: End of if statement.
(98) Event if_end: End of if statement.
295  					goto restart;
296  	
297  				/* validate */
(55) Event cond_false: Condition "rc != 0", taking false branch.
(56) Event if_end: End of if statement.
(67) Event cond_false: Condition "rc != 0", taking false branch.
(68) Event if_end: End of if statement.
(79) Event cond_false: Condition "rc != 0", taking false branch.
(80) Event if_end: End of if statement.
(99) Event cond_false: Condition "rc != 0", taking false branch.
(100) Event if_end: End of if statement.
(103) Event lockagain: Locking "&t->lock" again.
Also see events: [lock][def][unlock][use]
298  				rwlock_rdlock(&t->lock);
299  	
(57) Event cond_true: Condition "tgen != t->t.gen", taking true branch.
(69) Event cond_true: Condition "tgen != t->t.gen", taking true branch.
(81) Event cond_true: Condition "tgen != t->t.gen", taking true branch.
(101) Event cond_false: Condition "tgen != t->t.gen", taking false branch.
300  				if (tgen != t->t.gen) {
301  					n = opr_rbtree_lookup(&t->t, &sk.fd_node);
(58) Event cond_false: Condition "!n", taking false branch.
(70) Event cond_false: Condition "!n", taking false branch.
(82) Event cond_true: Condition "!n", taking true branch.
302  					if (!n) {
303  						/* invalidated, try harder */
(83) Event cond_false: Condition "rc != 0", taking false branch.
(84) Event if_end: End of if statement.
304  						rwlock_unlock(&t->lock);
305  								/* t !LOCKED */
(85) Event goto: Jumping to label "restart".
306  						goto restart;
(59) Event if_end: End of if statement.
(71) Event if_end: End of if statement.
307  					}
(102) Event if_end: End of if statement.
308  				}
(104) Event use: Using an unreliable value of "n" inside the second locked section. If the data that "n" depends on was changed by another thread, this use might be incorrect.
Also see events: [lock][def][unlock][lockagain]
309  				n = opr_rbtree_next(n);
(9) Event loop_end: Reached end of loop.
(20) Event loop_end: Reached end of loop.
(60) Event loop: Jumping back to the beginning of the loop.
(72) Event loop: Jumping back to the beginning of the loop.
310  			}		/* curr partition */
(10) Event cond_false: Condition "rc != 0", taking false branch.
(11) Event if_end: End of if statement.
(21) Event cond_false: Condition "rc != 0", taking false branch.
(22) Event if_end: End of if statement.
311  			rwlock_unlock(&t->lock); /* t !LOCKED */
312  			p_ix++;
(12) Event loop: Jumping back to the beginning of the loop.
(23) Event loop: Jumping back to the beginning of the loop.
313  		}			/* SVC_XPRT_PARTITIONS */
314  	
315  		return (0);
316  	}
317  	
318  	void
319  	svc_xprt_dump_xprts(const char *tag)
320  	{
321  		struct rbtree_x_part *t = NULL;
322  		struct opr_rbtree_node *n;
323  		struct rpc_dplx_rec *rec;
324  		int p_ix;
325  	
326  		if (!initialized)
327  			goto out;
328  	
329  		p_ix = 0;
330  		while (p_ix < SVC_XPRT_PARTITIONS) {
331  			t = &svc_xprt_fd.xt.tree[p_ix];
332  			rwlock_rdlock(&t->lock);	/* t RLOCKED */
333  			__warnx(TIRPC_DEBUG_FLAG_SVC_XPRT,
334  				"xprts at %s: tree %d size %d", tag, p_ix, t->t.size);
335  			n = opr_rbtree_first(&t->t);
336  			while (n != NULL) {
337  				rec = opr_containerof(n, struct rpc_dplx_rec, fd_node);
338  				__warnx(TIRPC_DEBUG_FLAG_SVC_XPRT,
339  					"xprts at %s: %p xp_fd %d",
340  					tag, &rec->xprt, rec->xprt.xp_fd);
341  				n = opr_rbtree_next(n);
342  			}		/* curr partition */
343  			rwlock_unlock(&t->lock);	/* t !LOCKED */
344  			p_ix++;
345  		}			/* SVC_XPRT_PARTITIONS */
346  	 out:
347  		return;
348  	}
349  	
350  	void
351  	svc_xprt_shutdown()
352  	{
353  		struct rbtree_x_part *t;
354  		struct opr_rbtree_node *n;
355  		struct rpc_dplx_rec *rec;
356  		int p_ix;
357  	
358  		if (!initialized)
359  			return;
360  	
361  		p_ix = 0;
362  		while (p_ix < SVC_XPRT_PARTITIONS) {
363  			t = &svc_xprt_fd.xt.tree[p_ix];
364  	
365  			rwlock_wrlock(&t->lock);	/* t WLOCKED */
366  			while ((n = opr_rbtree_first(&t->t))) {
367  				rec = opr_containerof(n, struct rpc_dplx_rec, fd_node);
368  	
369  				/* prevent repeats, see svc_xprt_clear() */
370  				opr_rbtree_remove(&t->t, &rec->fd_node);
371  	
372  				/* fd_node is counted by initial xp_refcnt = 1,
373  				 * SVC_DESTROY() decrements that reference.
374  				 */
375  				rwlock_unlock(&t->lock);
376  				SVC_DESTROY(&rec->xprt);
377  				rwlock_wrlock(&t->lock);
378  			}		/* curr partition */
379  			rwlock_unlock(&t->lock);	/* t !LOCKED */
380  			rwlock_destroy(&t->lock);
381  			p_ix++;
382  		}			/* SVC_XPRT_PARTITIONS */
383  	
384  		/* free tree */
385  		mem_free(svc_xprt_fd.xt.tree,
386  			 SVC_XPRT_PARTITIONS * sizeof(struct rbtree_x_part));
387  	}
388  	
389  	void
390  	svc_xprt_trace(SVCXPRT *xprt, const char *func, const char *tag, const int line)
391  	{
392  		__warnx(TIRPC_DEBUG_FLAG_REFCNT,
393  			"%s() %p fd %d xp_refcnt %" PRId32
394  			" af %u port %u @%s:%d",
395  			func, xprt, xprt->xp_fd, xprt->xp_refcnt,
396  			xprt->xp_remote.ss.ss_family,
397  			__rpc_address_port(&xprt->xp_remote),
398  			tag, line);
399  	}
400