1    | /***************************************
2    |   $Revision: 1.9 $
3    | 
4    |   Socket module - cd_watchdog.c - Socket watchdog - when activated, checks the
5    |                                   socket for new data and discards it. If the
6    | 				  socket is closed, it triggers predefined
7    | 				  functions - executes a function and/or 
8    | 				  cancels a thread.
9    | 
10   |   Status: NOT REVUED, TESTED
11   | 
12   |   Design and implementation by Marek Bukowy.
13   | 
14   |   Modification history:
15   |   marek  (August 2000) Created the watchdog part
16   |   marek  (December 2000) Modified watchdog deactivation - 
17   |                          replaced signals by pthread cancellation.
18   |   ******************/ /******************
19   |   Copyright (c) 1999,2000,2001,2002                     RIPE NCC
20   |  
21   |   All Rights Reserved
22   |   
23   |   Permission to use, copy, modify, and distribute this software and its
24   |   documentation for any purpose and without fee is hereby granted,
25   |   provided that the above copyright notice appear in all copies and that
26   |   both that copyright notice and this permission notice appear in
27   |   supporting documentation, and that the name of the author not be
28   |   used in advertising or publicity pertaining to distribution of the
29   |   software without specific, written prior permission.
30   |   
31   |   THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
32   |   ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL
33   |   AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
34   |   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
35   |   AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
36   |   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37   |   ***************************************/
38   | 
39   | #include "rip.h"
40   | 
41   | /*+ Uncomment this to use watchdog deactivation by signal (may be risky)
42   | 
43   |    #define WATCHDOG_BY_SIGNAL
44   | +*/
45   | 
46   | static pthread_once_t sk_init_once = PTHREAD_ONCE_INIT; 
47   | 
48   | #ifdef WATCHDOG_BY_SIGNAL
49   | 
50   | /*+ The signal version is complicated to cope with all timing situations.
51   |   It uses a thread specific flag to see if the signal handler was invoked
52   |   in case the signal arrives before select(3) is called in watchdog.
53   | +*/
54   | 
55   | /* thread specific flag */
56   | static pthread_key_t  sk_watch_tsd;
57   | 
58   | /*++++++++++++++++++++++++++++++++++++++
59   |   initialisation for the SIGNAL cancellation mode 
60   |   - initialises the thread specific flag.
61   |   ++++++++++++++++++++++++++++++++++++++*/
62   | static void sk_real_init(void)
63   | {
64   |   dieif( pthread_key_create( &sk_watch_tsd, NULL) != 0 );
65   | }
66   | 
67   | 
68   | /*++++++++++++++++++++++++++++++++++++++
69   |   sk_watchdog signal handler - sets the thread-specific flag.
70   | 
71   |   int n      signal received. (not used)
72   |   ++++++++++++++++++++++++++++++++++++++*/
73   | static void func_sigusr(int n) {
74   | #if 0
75   |   /* just for debugging - we don't check the value here */
76   |   int *tsd_flag = (int *) pthread_getspecific(sk_watch_tsd);
77   | #endif
78   | 
79   |   /* 2000/12/18 MB:
80   |      DEADLOCK has happened - the watchdog was just getting a mutex
81   |      for the ER rwlock when a signal arrived and the execution of the
82   |      pthread_mutex_lock function was interrupted AFTER the lock was
83   |      grabbed. The this handler was invoked and tried to get that mutex
84   |      again. As a result, everything stopped.
85   | 
86   |      Cures: 
87   |      1. Not invoke this here:
88   |      ER_dbg_va(FAC_SK, ASP_SK_GEN,"func_sigusr(%d) called", n);
89   | 
90   |      2. Not accept any signals during any pthread calls so that this
91   |      does not happen again. Must be reimplemented with pthread_cancel 
92   |      and all the signal stuff must go away. (Done, 2000/12/19).
93   |   */
94   |   /* set a thread-specific flag that the handler was invoked */
95   |   
96   |   pthread_setspecific(sk_watch_tsd, (void *)1 );
97   | }
98   | 
99   | /*++++++++++++++++++++++++++++++++++++++
100  |   watchdog (SIGNAL VERSION) - started as a separate thread. 
101  | 
102  |    Selects on the given socket; discards all input.
103  |    whenever it sees end of file (socket closed), it
104  |    * sets a corresponding flag in the condat structure, 
105  |    * triggers the predefined actions (by SK_watchtrigger).
106  | 
107  |   void *arg    - pointer to the connection data structure
108  |   ++++++++++++++++++++++++++++++++++++++*/
109  | static
110  | void *sk_watchdog(void *arg)
111  | {
112  |   sk_conn_st *condat = (sk_conn_st *) arg;
113  |   int nready;
114  |   int n;
115  |   fd_set rset;
116  |   char buff[STR_S];
117  |   int socket = condat->sock;
118  |   sigset_t sset;
119  |   struct sigaction act;
120  |   
121  |   struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
122  | 
123  |   FD_ZERO(&rset);
124  |   FD_SET(socket, &rset);
125  | 
126  |   sigemptyset(&sset);
127  |   sigaddset(&sset, SIGUSR2);
128  |   
129  |   act.sa_handler = func_sigusr;
130  |   act.sa_flags = 0;
131  |   dieif(sigaction(SIGUSR2, &act, NULL) != 0);
132  | 
133  |   /* XXX in fact, it's unblocked already. Should be blocked on startup */
134  |   dieif(pthread_sigmask(SIG_UNBLOCK, &sset, NULL) != 0);
135  |   
136  |   /* clear the handler's flag */
137  |   pthread_setspecific(sk_watch_tsd, NULL);
138  |   
139  |   /* now ready for signal */
140  |   pthread_mutex_unlock( & condat->watchmutex ); 
141  | 
142  |   /* hey, viva threaded signal handling! There is no way for select
143  |      to unblock a blocked signal, It must be done by "hand" (above).
144  | 
145  |      Consequently, every once in a while, the signal will be delivered
146  |      before the select starts :-/. So, we have to introduce a timeout
147  |      for select and check if the signal was delivered anyway....aARGH!!!
148  | 
149  |      This adds a <timeout interval> to unlucky queries, about 0.1% of all.
150  |   */
151  | 
152  |   while ((nready=select(socket+1, &rset, NULL, NULL, &timeout))!=-1) {
153  |     
154  |     ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
155  | 
156  |     /* don't even try to read if we have been killed */
157  |     if( errno == EINTR || pthread_getspecific(sk_watch_tsd) != NULL ) {
158  |       break;
159  |     }
160  | 
161  |     /* retry if the timeout has triggered */
162  |     if( nready == 0 ) {
163  |       continue;
164  |     }
165  | 
166  |    /* There was some input or client half of connection was closed */
167  |    /* Check for the latter */
168  |     if (( n=read(socket, buff, sizeof(buff))) == 0) {
169  |    /* Connection was closed by client */
170  |    /* Now send a cancellation request to the whois thread. */
171  |    /* mysql thread will be terminated by thread cleanup routine */
172  |      
173  |      /* call the actions: kill and exec (the SK_ functions called
174  | 	check if the action is defined. Will set the RTC flag on condat 
175  |      */
176  |      SK_watchtrigger(condat);
177  | 
178  |      /* quit */
179  |      break;
180  |    }
181  |    /* Otherwise dump input and continue */
182  | 
183  |   }
184  | 
185  |   /* Exit the watchdog thread, passing NULL as we don't expect a join */
186  |   pthread_exit(NULL);
187  | 
188  |   /* oh yes. Shouldn't compilers _recognize_ library functions ? */
189  |   return NULL;
190  | }
191  | 
192  | 
193  | #else /* not WATCHDOG_BY_SIGNAL */
194  | 
195  | 
196  | /*++++++++++++++++++++++++++++++++++++++
197  |   watchdog (CANCEL VERSION) - started as a separate thread. 
198  | 
199  |    Selects on the given socket; discards all input.
200  |    whenever it sees end of file (socket closed), it
201  |    * sets a corresponding flag in the condat structure, 
202  |    * triggers the predefined actions (by SK_watchtrigger).
203  | 
204  |   void *arg    - pointer to the connection data structure
205  |   ++++++++++++++++++++++++++++++++++++++*/
206  | static
207  | void *sk_watchdog(void *arg)
208  | {
209  |   sk_conn_st *condat = (sk_conn_st *) arg;
210  |   int nready;
211  |   int n;
212  |   char buff[STR_S];
213  |   int socket = condat->sock;
214  |   struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
215  |   fd_set rset;
216  |   
217  |   /* this is to allow cancellation of the select(3) call */
218  |   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
219  | 
220  |   /* now ready for the cancellation */
221  |   pthread_mutex_unlock( & condat->watchmutex ); 
222  |   
223  |   FD_ZERO(&rset);
224  |   FD_SET(socket, &rset);  
225  |   do {    
226  |     /* run the select exposed to cancellation */
227  |     pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
228  |     nready=select(socket+1, &rset, NULL, NULL, &timeout);
229  |     pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
230  | 
231  |     ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
232  |     /* quit on error */
233  |     if( nready < 0 ) {
234  |       break;
235  |     }
236  | 
237  |     /* retry if the timeout has triggered */
238  |     if( nready == 0 ) {
239  |       continue;
240  |     }
241  |     
242  |     /* There was some input or client half of connection was closed */
243  |     /* Check for the latter */
244  |     if (( n=read(socket, buff, sizeof(buff))) == 0) {
245  |       /* Connection was closed by client */
246  |       /* Now send a cancellation request to the whois thread. */
247  |       /* mysql thread will be terminated by thread cleanup routine */
248  |       
249  |       /* call the actions: kill and exec (the SK_ functions called
250  | 	 check if the action is defined. Will set the RTC flag on condat 
251  |       */
252  |       SK_watchtrigger(condat);
253  |       
254  |       /* quit */
255  |       break;
256  |     }
257  |     /* Otherwise dump input and continue */
258  | 
259  |   } while(nready != -1);
260  |   
261  |   return NULL; /* quit */
262  | }
263  | 
264  | 
265  | /*++++++++++++++++++++++++++++++++++++++
266  |   initialisation for the PTHREAD_CANCEL mode is not needed.
267  |   ++++++++++++++++++++++++++++++++++++++*/
268  | static void sk_real_init(void) {
269  |   /* EMPTY */
270  | }
271  | 
272  | #endif  /* WATCHDOG_BY_SIGNAL */
273  | 
274  | 
275  | /*++++++++++++++++++++++++++++++++++++++
276  |    starts sk_watchdog thread unless already started,
277  |    and registers its threadid in the condat structure
278  | 
279  |    dies if watchdog already running
280  | 
281  |    er_ret_t SK_watchstart   Returns SK_OK on success.
282  |    
283  |    sk_conn_st *condat       pointer to the connection data structure
284  | 
285  |    The structure may (and normally, should) contain the predefined actions
286  |    set by SK_watch_set... functions.
287  |   ++++++++++++++++++++++++++++++++++++++*/
288  | er_ret_t
289  | SK_watchstart(sk_conn_st *condat)
290  | {
291  |   dieif( condat->watchdog != 0 );
292  | 
293  |   /* init the mutex in locked state, watchdog will unlock it when 
294  |      it's ready for signal/cancellation */
295  |   pthread_mutex_init( & condat->watchmutex, NULL );
296  |   pthread_mutex_lock( & condat->watchmutex ); 
297  | 
298  | /* 
299  |    Linux doesn't seem to like getting signals in select(), which isn't
300  |    technically allowed by POSIX.  The workaround in this case is simply
301  |    to not create a watchdog for Linux.  This is probably acceptable
302  |    because we will be changing the query path to perform queries in small
303  |    chunks, so if a disconnect occurs it won't consume a lot of database
304  |    resources in any case, even without a watchdog.
305  | 
306  |    SCO has a really small stack, so we don't want to create extra threads.
307  |  */
308  | #if !defined(__linux__) && !defined(SCO)
309  |   /* NOT DETACHED! */
310  |   pthread_create(&condat->watchdog, NULL, sk_watchdog, (void *) condat );
311  | #endif /* __linux__ */
312  | 
313  |   return SK_OK;
314  | }
315  | 
316  | 
317  | /*++++++++++++++++++++++++++++++++++++++
318  |   
319  |   stops running sk_watchdog thread. 
320  |   If it is not running ( == not registered in the connection struct), 
321  |   it does nothing.
322  | 
323  |   er_ret_t SK_watchstop    always succeeds (returns SK_OK)
324  |   
325  |   sk_conn_st *condat       pointer to the connection data structure
326  |   ++++++++++++++++++++++++++++++++++++++*/
327  | er_ret_t
328  | SK_watchstop(sk_conn_st *condat)
329  | {
330  |   void *res;
331  | 
332  |   if(condat->watchdog > 0) {
333  |     int ret;
334  | 
335  |     /* wait until the watchdog is ready for signal */
336  |     pthread_mutex_lock( & condat->watchmutex ); 
337  | 
338  | #ifdef WATCHDOG_BY_SIGNAL
339  |     ret = pthread_kill(condat->watchdog, SIGUSR2);
340  | #else
341  |     ret = pthread_cancel(condat->watchdog);
342  | #endif
343  |     
344  |     ret = pthread_join(condat->watchdog, &res);
345  |     
346  |     pthread_mutex_destroy( & condat->watchmutex ); 
347  |     condat->watchdog = 0;
348  |   }
349  |   return SK_OK;
350  | }
351  | 
352  | 
353  | /*++++++++++++++++++++++++++++++++++++++
354  | 
355  |   void SK_watch_setkill       sets the thread id of the thread to be
356  |                               cancelled by the watchdog watching this socket.
357  | 			      0 (default) means do not cancel anything.
358  | 
359  |   sk_conn_st *condat          pointer to the connection data structure.
360  |   
361  |   pthread_t killthis          thread id of the thread to be cancelled, or 0.
362  |   ++++++++++++++++++++++++++++++++++++++*/
363  | void
364  | SK_watch_setkill(sk_conn_st *condat, pthread_t killthis)
365  | {
366  |   condat->killthis = killthis;
367  | }
368  | 
369  | 
370  | /*++++++++++++++++++++++++++++++++++++++
371  |   
372  |   void SK_watch_setexec       sets the function to be invoked by the watchdog 
373  |                               watching this socket. NULL (default) means do
374  | 			      not invoke anything.
375  |   
376  |   sk_conn_st *condat          pointer to the connection data structure.
377  |   
378  |   void *(*function)(void *)   function to be invoked
379  |   
380  |   void *args                  argument to be passed to the function.
381  | 
382  |   ++++++++++++++++++++++++++++++++++++++*/
383  | void
384  | SK_watch_setexec( sk_conn_st *condat, void *(*function)(void *) , void *args)
385  | {
386  |   condat->execthis = function;
387  |   condat->execargs = args;
388  | }
389  | 
390  | 
391  | /*++++++++++++++++++++++++++++++++++++++
392  |   
393  |   void SK_watch_setclear      clears the function and thread id fields so that
394  |                               nothing gets cancelled or invoked by the 
395  | 			      watchdog.
396  | 
397  |   sk_conn_st *condat          pointer to the connection data structure.
398  | 
399  |   ++++++++++++++++++++++++++++++++++++++*/
400  | void 
401  | SK_watch_setclear(sk_conn_st *condat) 
402  | {
403  |   condat->execthis = NULL;
404  |   condat->execargs = NULL;
405  |   condat->killthis = 0;
406  | }
407  | 
408  | /* call the function to be called if defined */
409  | 
410  | 
411  | /*++++++++++++++++++++++++++++++++++++++
412  | 
413  |   void SK_watchexec          invokes the predefined function if defined.
414  |                              (usually called from the watchdog). 
415  | 			     Also sets the reason-to-close
416  | 			     flag on this connection to SK_INTERRUPT.
417  |   
418  |   sk_conn_st *condat         pointer to the connection data structure.
419  | 
420  |   ++++++++++++++++++++++++++++++++++++++*/
421  | void 
422  | SK_watchexec(sk_conn_st *condat) 
423  | {
424  |   /* set the reason-to-close flag on this connection */
425  |   condat->rtc |= SK_INTERRUPT;
426  |   
427  |   if( condat->execthis != NULL ) {
428  |     condat->execthis(condat->execargs);
429  |   } 
430  | }
431  | 
432  | /* cancel the thread to be cancelled if defined */
433  | 
434  | 
435  | /*++++++++++++++++++++++++++++++++++++++
436  |   
437  |   void SK_watchkill          cancels the predefined thread if defined.
438  |                              (usually called from the watchdog). 
439  | 			     Also sets the reason-to-close
440  | 			     flag on this connection to SK_INTERRUPT.
441  | 
442  |   sk_conn_st *condat         pointer to the connection data structure.
443  | 
444  |   ++++++++++++++++++++++++++++++++++++++*/
445  | void 
446  | SK_watchkill(sk_conn_st *condat) {
447  | 
448  |   /* set the reason-to-close flag on this connection */
449  |   condat->rtc |= SK_INTERRUPT;
450  | 
451  |   /* cancel thread if defined */
452  |   if( condat->killthis != 0 ) {
453  |     pthread_cancel(condat->killthis);
454  |     /* The only possible error is ESRCH, so we do not care about it*/
455  |   }
456  | }
457  | 
458  | 
459  | /*++++++++++++++++++++++++++++++++++++++
460  |   
461  |   void SK_watchtrigger       Wrapper around SK_watchkill and SK_watchexec.
462  |                              First executes the function, then cancels the
463  | 			     thread.
464  | 
465  |   sk_conn_st *condat         pointer to the connection data structure.
466  | 
467  |   ++++++++++++++++++++++++++++++++++++++*/
468  | void SK_watchtrigger(sk_conn_st *condat) 
469  | {
470  |      SK_watchexec(condat); 
471  |      SK_watchkill(condat);    
472  | }
473  | 
474  | 
475  | /*++++++++++++++++++++++++++++++++++++++
476  |   Initialisation function, should be called exactly once 
477  |   (well, it ignores repeated calls). The actions depend on cancellation
478  |   mode (signal or pthread_cancel).
479  |   ++++++++++++++++++++++++++++++++++++++*/
480  | void  SK_init(void)
481  | {
482  |   /* can be called only once */
483  |   pthread_once( &sk_init_once, sk_real_init);
484  | }