1    | /***************************************
2    |   $Revision: 1.4 $
3    | 
4    |   Socket module - cd_watchdog.c - Socket watchdog - when activated, checks the
5    |                                   socket for new data and discards it. If the
6    | 				  socket is closed, it triggers predefined
7    | 				  functions - executes a function and/or 
8    | 				  cancels a thread.
9    | 
10   |   Status: NOT REVUED, TESTED
11   | 
12   |   Design and implementation by Marek Bukowy.
13   | 
14   |   Modification history:
15   |   marek  (August 2000) Created the watchdog part
16   |   marek  (December 2000) Modified watchdog deactivation - 
17   |                          replaced signals by pthread cancellation.
18   |   ******************/ /******************
19   |   Copyright (c) 1999, 2000                           RIPE NCC
20   |  
21   |   All Rights Reserved
22   |   
23   |   Permission to use, copy, modify, and distribute this software and its
24   |   documentation for any purpose and without fee is hereby granted,
25   |   provided that the above copyright notice appear in all copies and that
26   |   both that copyright notice and this permission notice appear in
27   |   supporting documentation, and that the name of the author not be
28   |   used in advertising or publicity pertaining to distribution of the
29   |   software without specific, written prior permission.
30   |   
31   |   THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
32   |   ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL
33   |   AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
34   |   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
35   |   AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
36   |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37   |   ***************************************/
38   | 
39   | #include "sk.h"
40   | /*+ String sizes +*/
41   | #define STR_S   63
42   | 
43   | /*+ Uncomment this to use watchdog deactivation by signal (may be risky)
44   | 
45   |    #define WATCHDOG_BY_SIGNAL
46   | +*/
47   | 
48   | static pthread_once_t sk_init_once = { PTHREAD_ONCE_INIT }; 
49   | 
50   | #ifdef WATCHDOG_BY_SIGNAL
51   | 
52   | /*+ The signal version is complicated to cope with all timing situations.
53   |   It uses a thread specific flag to see if the signal handler was invoked
54   |   in case the signal arrives before select(3) is called in watchdog.
55   | +*/
56   | 
57   | /* thread specific flag */
58   | static pthread_key_t  sk_watch_tsd;
59   | 
60   | /*++++++++++++++++++++++++++++++++++++++
61   |   initialisation for the SIGNAL cancellation mode 
62   |   - initialises the thread specific flag.
63   |   ++++++++++++++++++++++++++++++++++++++*/
64   | static void sk_real_init(void)
65   | {
66   |   dieif( pthread_key_create( &sk_watch_tsd, NULL) != 0 );
67   | }
68   | 
69   | 
70   | /*++++++++++++++++++++++++++++++++++++++
71   |   sk_watchdog signal handler - sets the thread-specific flag.
72   | 
73   |   int n      signal received. (not used)
74   |   ++++++++++++++++++++++++++++++++++++++*/
75   | static void func_sigusr(int n) {
76   | #if 0
77   |   /* just for debugging - we don't check the value here */
78   |   int *tsd_flag = (int *) pthread_getspecific(sk_watch_tsd);
79   | #endif
80   | 
81   |   /* 2000/12/18 MB:
82   |      DEADLOCK has happened - the watchdog was just getting a mutex
83   |      for the ER rwlock when a signal arrived and the execution of the
84   |      pthread_mutex_lock function was interrupted AFTER the lock was
85   |      grabbed. The this handler was invoked and tried to get that mutex
86   |      again. As a result, everything stopped.
87   | 
88   |      Cures: 
89   |      1. Not invoke this here:
90   |      ER_dbg_va(FAC_SK, ASP_SK_GEN,"func_sigusr(%d) called", n);
91   | 
92   |      2. Not accept any signals during any pthread calls so that this
93   |      does not happen again. Must be reimplemented with pthread_cancel 
94   |      and all the signal stuff must go away. (Done, 2000/12/19).
95   |   */
96   |   /* set a thread-specific flag that the handler was invoked */
97   |   
98   |   pthread_setspecific(sk_watch_tsd, (void *)1 );
99   | }
100  | 
101  | /*++++++++++++++++++++++++++++++++++++++
102  |   watchdog (SIGNAL VERSION) - started as a separate thread. 
103  | 
104  |    Selects on the given socket; discards all input.
105  |    whenever it sees end of file (socket closed), it
106  |    * sets a corresponding flag in the condat structure, 
107  |    * triggers the predefined actions (by SK_watchtrigger).
108  | 
109  |   void *arg    - pointer to the connection data structure
110  |   ++++++++++++++++++++++++++++++++++++++*/
111  | static
112  | void *sk_watchdog(void *arg)
113  | {
114  |   sk_conn_st *condat = (sk_conn_st *) arg;
115  |   int nready;
116  |   int n;
117  |   fd_set rset;
118  |   char buff[STR_S];
119  |   int socket = condat->sock;
120  |   sigset_t sset;
121  |   struct sigaction act;
122  |   
123  |   struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
124  | 
125  |   FD_ZERO(&rset);
126  |   FD_SET(socket, &rset);
127  | 
128  |   sigemptyset(&sset);
129  |   sigaddset(&sset, SIGUSR2);
130  |   
131  |   act.sa_handler = func_sigusr;
132  |   act.sa_flags = 0;
133  |   dieif(sigaction(SIGUSR2, &act, NULL) != 0);
134  | 
135  |   /* XXX in fact, it's unblocked already. Should be blocked on startup */
136  |   dieif(pthread_sigmask(SIG_UNBLOCK, &sset, NULL) != 0);
137  |   
138  |   /* clear the handler's flag */
139  |   pthread_setspecific(sk_watch_tsd, NULL);
140  |   
141  |   /* now ready for signal */
142  |   pthread_mutex_unlock( & condat->watchmutex ); 
143  | 
144  |   /* hey, viva threaded signal handling! There is no way for select
145  |      to unblock a blocked signal, It must be done by "hand" (above).
146  | 
147  |      Consequently, every once in a while, the signal will be delivered
148  |      before the select starts :-/. So, we have to introduce a timeout
149  |      for select and check if the signal was delivered anyway....aARGH!!!
150  | 
151  |      This adds a <timeout interval> to unlucky queries, about 0.1% of all.
152  |   */
153  | 
154  |   while ((nready=select(socket+1, &rset, NULL, NULL, &timeout))!=-1) {
155  |     
156  |     ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
157  | 
158  |     /* don't even try to read if we have been killed */
159  |     if( errno == EINTR || pthread_getspecific(sk_watch_tsd) != NULL ) {
160  |       break;
161  |     }
162  | 
163  |     /* retry if the timeout has triggered */
164  |     if( nready == 0 ) {
165  |       continue;
166  |     }
167  | 
168  |    /* There was some input or client half of connection was closed */
169  |    /* Check for the latter */
170  |     if (( n=read(socket, buff, sizeof(buff))) == 0) {
171  |    /* Connection was closed by client */
172  |    /* Now send a cancellation request to the whois thread. */
173  |    /* mysql thread will be terminated by thread cleanup routine */
174  |      
175  |      /* call the actions: kill and exec (the SK_ functions called
176  | 	check if the action is defined. Will set the RTC flag on condat 
177  |      */
178  |      SK_watchtrigger(condat);
179  | 
180  |      /* quit */
181  |      break;
182  |    }
183  |    /* Otherwise dump input and continue */
184  | 
185  |   }
186  | 
187  |   /* Exit the watchdog thread, passing NULL as we don't expect a join */
188  |   pthread_exit(NULL);
189  | 
190  |   /* oh yes. Shouldn't compilers _recognize_ library functions ? */
191  |   return NULL;
192  | }
193  | 
194  | 
195  | #else /* not WATCHDOG_BY_SIGNAL */
196  | 
197  | 
198  | /*++++++++++++++++++++++++++++++++++++++
199  |   watchdog (CANCEL VERSION) - started as a separate thread. 
200  | 
201  |    Selects on the given socket; discards all input.
202  |    whenever it sees end of file (socket closed), it
203  |    * sets a corresponding flag in the condat structure, 
204  |    * triggers the predefined actions (by SK_watchtrigger).
205  | 
206  |   void *arg    - pointer to the connection data structure
207  |   ++++++++++++++++++++++++++++++++++++++*/
208  | static
209  | void *sk_watchdog(void *arg)
210  | {
211  |   sk_conn_st *condat = (sk_conn_st *) arg;
212  |   int nready;
213  |   int n;
214  |   char buff[STR_S];
215  |   int socket = condat->sock;
216  |   struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
217  |   fd_set rset;
218  |   
219  |   /* this is to allow cancellation of the select(3) call */
220  |   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
221  | 
222  |   /* now ready for the cancellation */
223  |   pthread_mutex_unlock( & condat->watchmutex ); 
224  |   
225  |   FD_ZERO(&rset);
226  |   FD_SET(socket, &rset);  
227  |   do {    
228  |     /* run the select exposed to cancellation */
229  |     pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
230  |     nready=select(socket+1, &rset, NULL, NULL, &timeout);
231  |     pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
232  | 
233  |     ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
234  |     /* quit on error */
235  |     if( nready < 0 ) {
236  |       break;
237  |     }
238  | 
239  |     /* retry if the timeout has triggered */
240  |     if( nready == 0 ) {
241  |       continue;
242  |     }
243  |     
244  |     /* There was some input or client half of connection was closed */
245  |     /* Check for the latter */
246  |     if (( n=read(socket, buff, sizeof(buff))) == 0) {
247  |       /* Connection was closed by client */
248  |       /* Now send a cancellation request to the whois thread. */
249  |       /* mysql thread will be terminated by thread cleanup routine */
250  |       
251  |       /* call the actions: kill and exec (the SK_ functions called
252  | 	 check if the action is defined. Will set the RTC flag on condat 
253  |       */
254  |       SK_watchtrigger(condat);
255  |       
256  |       /* quit */
257  |       break;
258  |     }
259  |     /* Otherwise dump input and continue */
260  | 
261  |   } while(nready != -1);
262  |   
263  |   return NULL; /* quit */
264  | }
265  | 
266  | 
267  | /*++++++++++++++++++++++++++++++++++++++
268  |   initialisation for the PTHREAD_CANCEL mode is not needed.
269  |   ++++++++++++++++++++++++++++++++++++++*/
270  | static void sk_real_init(void) {
271  |   /* EMPTY */
272  | }
273  | 
274  | #endif  /* WATCHDOG_BY_SIGNAL */
275  | 
276  | 
277  | /*++++++++++++++++++++++++++++++++++++++
278  |    starts sk_watchdog thread unless already started,
279  |    and registers its threadid in the condat structure
280  | 
281  |    dies if watchdog already running
282  | 
283  |    er_ret_t SK_watchstart   Returns SK_OK on success.
284  |    
285  |    sk_conn_st *condat       pointer to the connection data structure
286  | 
287  |    The structure may (and normally, should) contain the predefined actions
288  |    set by SK_watch_set... functions.
289  |   ++++++++++++++++++++++++++++++++++++++*/
290  | er_ret_t
291  | SK_watchstart(sk_conn_st *condat)
292  | {
293  |   pthread_attr_t attr;
294  |   size_t ssize;
295  |   int ret;
296  | 
297  |   dieif( condat->watchdog != 0 );
298  | 
299  |   dieif(pthread_attr_init(&attr) != 0);
300  | 
301  | #if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \
302  |     defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE)
303  |   /*********
304  |     For SCO, we need to increase the stack size, because the default is
305  |     exceedingly small.  This also works on FreeBSD.  In Solaris, the
306  |     stack size is 0, which is interpreted as the default, meaning 1
307  |     Mbyte for 32-bit processes or 2 Mbyte for 64-bit processes.
308  |     However, trying to *set* the stack size to 0 results in an error.
309  |     Therefore, we don't want to set the size to 0.  Probably not a good
310  |     idea in any event.  :) Linux doesn't support this function (as of
311  |     the 2.4.2 kernel).
312  | 
313  |     Note: see also modules/th/thread.c
314  |    *********/
315  |   dieif(pthread_attr_getstacksize(&attr, &ssize) != 0);
316  |   if (ssize > 0) {
317  |       dieif(pthread_attr_setstacksize(&attr, ssize * 4) != 0);
318  |   }
319  | #endif
320  |   
321  |   /* init the mutex in locked state, watchdog will unlock it when 
322  |      it's ready for signal/cancellation */
323  |   pthread_mutex_init( & condat->watchmutex, NULL );
324  |   pthread_mutex_lock( & condat->watchmutex ); 
325  | 
326  |   /* NOT DETACHED! */
327  |   pthread_create(&condat->watchdog, &attr, sk_watchdog, (void *) condat );
328  | 
329  |   dieif(pthread_attr_destroy(&attr) != 0);
330  |   
331  |   return SK_OK;
332  | }
333  | 
334  | 
335  | /*++++++++++++++++++++++++++++++++++++++
336  |   
337  |   stops running sk_watchdog thread. 
338  |   If it is not running ( == not registered in the connection struct), 
339  |   it does nothing.
340  | 
341  |   er_ret_t SK_watchstop    always succeeds (returns SK_OK)
342  |   
343  |   sk_conn_st *condat       pointer to the connection data structure
344  |   ++++++++++++++++++++++++++++++++++++++*/
345  | er_ret_t
346  | SK_watchstop(sk_conn_st *condat)
347  | {
348  |   void *res;
349  | 
350  |   if(condat->watchdog > 0) {
351  |     int ret;
352  | 
353  |     /* wait until the watchdog is ready for signal */
354  |     pthread_mutex_lock( & condat->watchmutex ); 
355  | 
356  | #ifdef WATCHDOG_BY_SIGNAL
357  |     ret = pthread_kill(condat->watchdog, SIGUSR2);
358  | #else
359  |     ret = pthread_cancel(condat->watchdog);
360  | #endif
361  |     
362  |     ret = pthread_join(condat->watchdog, &res);
363  |     
364  |     pthread_mutex_destroy( & condat->watchmutex ); 
365  |     condat->watchdog = 0;
366  |   }
367  |   return SK_OK;
368  | }
369  | 
370  | 
371  | /*++++++++++++++++++++++++++++++++++++++
372  | 
373  |   void SK_watch_setkill       sets the thread id of the thread to be
374  |                               cancelled by the watchdog watching this socket.
375  | 			      0 (default) means do not cancel anything.
376  | 
377  |   sk_conn_st *condat          pointer to the connection data structure.
378  |   
379  |   pthread_t killthis          thread id of the thread to be cancelled, or 0.
380  |   ++++++++++++++++++++++++++++++++++++++*/
381  | void
382  | SK_watch_setkill(sk_conn_st *condat, pthread_t killthis)
383  | {
384  |   condat->killthis = killthis;
385  | }
386  | 
387  | 
388  | /*++++++++++++++++++++++++++++++++++++++
389  |   
390  |   void SK_watch_setexec       sets the function to be invoked by the watchdog 
391  |                               watching this socket. NULL (default) means do
392  | 			      not invoke anything.
393  |   
394  |   sk_conn_st *condat          pointer to the connection data structure.
395  |   
396  |   void *(*function)(void *)   function to be invoked
397  |   
398  |   void *args                  argument to be passed to the function.
399  | 
400  |   ++++++++++++++++++++++++++++++++++++++*/
401  | void
402  | SK_watch_setexec( sk_conn_st *condat, void *(*function)(void *) , void *args)
403  | {
404  |   condat->execthis = function;
405  |   condat->execargs = args;
406  | }
407  | 
408  | 
409  | /*++++++++++++++++++++++++++++++++++++++
410  |   
411  |   void SK_watch_setclear      clears the function and thread id fields so that
412  |                               nothing gets cancelled or invoked by the 
413  | 			      watchdog.
414  | 
415  |   sk_conn_st *condat          pointer to the connection data structure.
416  | 
417  |   ++++++++++++++++++++++++++++++++++++++*/
418  | void 
419  | SK_watch_setclear(sk_conn_st *condat) 
420  | {
421  |   condat->execthis = NULL;
422  |   condat->execargs = NULL;
423  |   condat->killthis = 0;
424  | }
425  | 
426  | /* call the function to be called if defined */
427  | 
428  | 
429  | /*++++++++++++++++++++++++++++++++++++++
430  | 
431  |   void SK_watchexec          invokes the predefined function if defined.
432  |                              (usually called from the watchdog). 
433  | 			     Also sets the reason-to-close
434  | 			     flag on this connection to SK_INTERRUPT.
435  |   
436  |   sk_conn_st *condat         pointer to the connection data structure.
437  | 
438  |   ++++++++++++++++++++++++++++++++++++++*/
439  | void 
440  | SK_watchexec(sk_conn_st *condat) 
441  | {
442  |   /* set the reason-to-close flag on this connection */
443  |   condat->rtc |= SK_INTERRUPT;
444  |   
445  |   if( condat->execthis != NULL ) {
446  |     condat->execthis(condat->execargs);
447  |   } 
448  | }
449  | 
450  | /* cancel the thread to be cancelled if defined */
451  | 
452  | 
453  | /*++++++++++++++++++++++++++++++++++++++
454  |   
455  |   void SK_watchkill          cancels the predefined thread if defined.
456  |                              (usually called from the watchdog). 
457  | 			     Also sets the reason-to-close
458  | 			     flag on this connection to SK_INTERRUPT.
459  | 
460  |   sk_conn_st *condat         pointer to the connection data structure.
461  | 
462  |   ++++++++++++++++++++++++++++++++++++++*/
463  | void 
464  | SK_watchkill(sk_conn_st *condat) {
465  | 
466  |   /* set the reason-to-close flag on this connection */
467  |   condat->rtc |= SK_INTERRUPT;
468  | 
469  |   /* cancel thread if defined */
470  |   if( condat->killthis != 0 ) {
471  |     pthread_cancel(condat->killthis);
472  |     /* The only possible error is ESRCH, so we do not care about it*/
473  |   }
474  | }
475  | 
476  | 
477  | /*++++++++++++++++++++++++++++++++++++++
478  |   
479  |   void SK_watchtrigger       Wrapper around SK_watchkill and SK_watchexec.
480  |                              First executes the function, then cancels the
481  | 			     thread.
482  | 
483  |   sk_conn_st *condat         pointer to the connection data structure.
484  | 
485  |   ++++++++++++++++++++++++++++++++++++++*/
486  | void SK_watchtrigger(sk_conn_st *condat) 
487  | {
488  |      SK_watchexec(condat); 
489  |      SK_watchkill(condat);    
490  | }
491  | 
492  | 
493  | /*++++++++++++++++++++++++++++++++++++++
494  |   Initialisation function, should be called exactly once 
495  |   (well, it ignores repeated calls). The actions depend on cancellation
496  |   mode (signal or pthread_cancel).
497  |   ++++++++++++++++++++++++++++++++++++++*/
498  | void  SK_init(void)
499  | {
500  |   /* can be called only once */
501  |   pthread_once( &sk_init_once, sk_real_init);
502  | }