modules/sk/cd_watchdog.c

/* [<][>]
[^][v][top][bottom][index][help] */

FUNCTIONS

This source file includes following functions.
  1. sk_real_init
  2. func_sigusr
  3. sk_watchdog
  4. sk_watchdog
  5. sk_real_init
  6. SK_watchstart
  7. SK_watchstop
  8. SK_watch_setkill
  9. SK_watch_setexec
  10. SK_watch_setclear
  11. SK_watchexec
  12. SK_watchkill
  13. SK_watchtrigger
  14. SK_init

   1 /***************************************
   2   $Revision: 1.8 $
   3 
   4   Socket module - cd_watchdog.c - Socket watchdog - when activated, checks the
   5                                   socket for new data and discards it. If the
   6                                   socket is closed, it triggers predefined
   7                                   functions - executes a function and/or 
   8                                   cancels a thread.
   9 
  10   Status: NOT REVUED, TESTED
  11 
  12   Design and implementation by Marek Bukowy.
  13 
  14   Modification history:
  15   marek  (August 2000) Created the watchdog part
  16   marek  (December 2000) Modified watchdog deactivation - 
  17                          replaced signals by pthread cancellation.
  18   ******************/ /******************
  19   Copyright (c) 1999,2000,2001,2002                     RIPE NCC
  20  
  21   All Rights Reserved
  22   
  23   Permission to use, copy, modify, and distribute this software and its
  24   documentation for any purpose and without fee is hereby granted,
  25   provided that the above copyright notice appear in all copies and that
  26   both that copyright notice and this permission notice appear in
  27   supporting documentation, and that the name of the author not be
  28   used in advertising or publicity pertaining to distribution of the
  29   software without specific, written prior permission.
  30   
  31   THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
  32   ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL
  33   AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  34   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
  35   AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  36   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  37   ***************************************/
  38 
  39 #include "rip.h"
  40 
  41 /*+ Uncomment this to use watchdog deactivation by signal (may be risky)
  42 
  43    #define WATCHDOG_BY_SIGNAL
  44 +*/
  45 
  46 static pthread_once_t sk_init_once = PTHREAD_ONCE_INIT; 
  47 
  48 #ifdef WATCHDOG_BY_SIGNAL
  49 
  50 /*+ The signal version is complicated to cope with all timing situations.
  51   It uses a thread specific flag to see if the signal handler was invoked
  52   in case the signal arrives before select(3) is called in watchdog.
  53 +*/
  54 
  55 /* thread specific flag */
  56 static pthread_key_t  sk_watch_tsd;
  57 
  58 /*++++++++++++++++++++++++++++++++++++++
  59   initialisation for the SIGNAL cancellation mode 
  60   - initialises the thread specific flag.
  61   ++++++++++++++++++++++++++++++++++++++*/
  62 static void sk_real_init(void)
     /* [<][>][^][v][top][bottom][index][help] */
  63 {
  64   dieif( pthread_key_create( &sk_watch_tsd, NULL) != 0 );
  65 }
  66 
  67 
  68 /*++++++++++++++++++++++++++++++++++++++
  69   sk_watchdog signal handler - sets the thread-specific flag.
  70 
  71   int n      signal received. (not used)
  72   ++++++++++++++++++++++++++++++++++++++*/
  73 static void func_sigusr(int n) {
     /* [<][>][^][v][top][bottom][index][help] */
  74 #if 0
  75   /* just for debugging - we don't check the value here */
  76   int *tsd_flag = (int *) pthread_getspecific(sk_watch_tsd);
  77 #endif
  78 
  79   /* 2000/12/18 MB:
  80      DEADLOCK has happened - the watchdog was just getting a mutex
  81      for the ER rwlock when a signal arrived and the execution of the
  82      pthread_mutex_lock function was interrupted AFTER the lock was
  83      grabbed. The this handler was invoked and tried to get that mutex
  84      again. As a result, everything stopped.
  85 
  86      Cures: 
  87      1. Not invoke this here:
  88      ER_dbg_va(FAC_SK, ASP_SK_GEN,"func_sigusr(%d) called", n);
  89 
  90      2. Not accept any signals during any pthread calls so that this
  91      does not happen again. Must be reimplemented with pthread_cancel 
  92      and all the signal stuff must go away. (Done, 2000/12/19).
  93   */
  94   /* set a thread-specific flag that the handler was invoked */
  95   
  96   pthread_setspecific(sk_watch_tsd, (void *)1 );
  97 }
  98 
  99 /*++++++++++++++++++++++++++++++++++++++
 100   watchdog (SIGNAL VERSION) - started as a separate thread. 
 101 
 102    Selects on the given socket; discards all input.
 103    whenever it sees end of file (socket closed), it
 104    * sets a corresponding flag in the condat structure, 
 105    * triggers the predefined actions (by SK_watchtrigger).
 106 
 107   void *arg    - pointer to the connection data structure
 108   ++++++++++++++++++++++++++++++++++++++*/
 109 static
 110 void *sk_watchdog(void *arg)
     /* [<][>][^][v][top][bottom][index][help] */
 111 {
 112   sk_conn_st *condat = (sk_conn_st *) arg;
 113   int nready;
 114   int n;
 115   fd_set rset;
 116   char buff[STR_S];
 117   int socket = condat->sock;
 118   sigset_t sset;
 119   struct sigaction act;
 120   
 121   struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
 122 
 123   FD_ZERO(&rset);
 124   FD_SET(socket, &rset);
 125 
 126   sigemptyset(&sset);
 127   sigaddset(&sset, SIGUSR2);
 128   
 129   act.sa_handler = func_sigusr;
 130   act.sa_flags = 0;
 131   dieif(sigaction(SIGUSR2, &act, NULL) != 0);
 132 
 133   /* XXX in fact, it's unblocked already. Should be blocked on startup */
 134   dieif(pthread_sigmask(SIG_UNBLOCK, &sset, NULL) != 0);
 135   
 136   /* clear the handler's flag */
 137   pthread_setspecific(sk_watch_tsd, NULL);
 138   
 139   /* now ready for signal */
 140   pthread_mutex_unlock( & condat->watchmutex ); 
 141 
 142   /* hey, viva threaded signal handling! There is no way for select
 143      to unblock a blocked signal, It must be done by "hand" (above).
 144 
 145      Consequently, every once in a while, the signal will be delivered
 146      before the select starts :-/. So, we have to introduce a timeout
 147      for select and check if the signal was delivered anyway....aARGH!!!
 148 
 149      This adds a <timeout interval> to unlucky queries, about 0.1% of all.
 150   */
 151 
 152   while ((nready=select(socket+1, &rset, NULL, NULL, &timeout))!=-1) {
 153     
 154     ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
 155 
 156     /* don't even try to read if we have been killed */
 157     if( errno == EINTR || pthread_getspecific(sk_watch_tsd) != NULL ) {
 158       break;
 159     }
 160 
 161     /* retry if the timeout has triggered */
 162     if( nready == 0 ) {
 163       continue;
 164     }
 165 
 166    /* There was some input or client half of connection was closed */
 167    /* Check for the latter */
 168     if (( n=read(socket, buff, sizeof(buff))) == 0) {
 169    /* Connection was closed by client */
 170    /* Now send a cancellation request to the whois thread. */
 171    /* mysql thread will be terminated by thread cleanup routine */
 172      
 173      /* call the actions: kill and exec (the SK_ functions called
 174         check if the action is defined. Will set the RTC flag on condat 
 175      */
 176      SK_watchtrigger(condat);
 177 
 178      /* quit */
 179      break;
 180    }
 181    /* Otherwise dump input and continue */
 182 
 183   }
 184 
 185   /* Exit the watchdog thread, passing NULL as we don't expect a join */
 186   pthread_exit(NULL);
 187 
 188   /* oh yes. Shouldn't compilers _recognize_ library functions ? */
 189   return NULL;
 190 }
 191 
 192 
 193 #else /* not WATCHDOG_BY_SIGNAL */
 194 
 195 
 196 /*++++++++++++++++++++++++++++++++++++++
 197   watchdog (CANCEL VERSION) - started as a separate thread. 
 198 
 199    Selects on the given socket; discards all input.
 200    whenever it sees end of file (socket closed), it
 201    * sets a corresponding flag in the condat structure, 
 202    * triggers the predefined actions (by SK_watchtrigger).
 203 
 204   void *arg    - pointer to the connection data structure
 205   ++++++++++++++++++++++++++++++++++++++*/
 206 static
 207 void *sk_watchdog(void *arg)
     /* [<][>][^][v][top][bottom][index][help] */
 208 {
 209   sk_conn_st *condat = (sk_conn_st *) arg;
 210   int nready;
 211   int n;
 212   char buff[STR_S];
 213   int socket = condat->sock;
 214   struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */
 215   fd_set rset;
 216   
 217   /* this is to allow cancellation of the select(3) call */
 218   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
 219 
 220   /* now ready for the cancellation */
 221   pthread_mutex_unlock( & condat->watchmutex ); 
 222   
 223   FD_ZERO(&rset);
 224   FD_SET(socket, &rset);  
 225   do {    
 226     /* run the select exposed to cancellation */
 227     pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
 228     nready=select(socket+1, &rset, NULL, NULL, &timeout);
 229     pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
 230 
 231     ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready);
 232     /* quit on error */
 233     if( nready < 0 ) {
 234       break;
 235     }
 236 
 237     /* retry if the timeout has triggered */
 238     if( nready == 0 ) {
 239       continue;
 240     }
 241     
 242     /* There was some input or client half of connection was closed */
 243     /* Check for the latter */
 244     if (( n=read(socket, buff, sizeof(buff))) == 0) {
 245       /* Connection was closed by client */
 246       /* Now send a cancellation request to the whois thread. */
 247       /* mysql thread will be terminated by thread cleanup routine */
 248       
 249       /* call the actions: kill and exec (the SK_ functions called
 250          check if the action is defined. Will set the RTC flag on condat 
 251       */
 252       SK_watchtrigger(condat);
 253       
 254       /* quit */
 255       break;
 256     }
 257     /* Otherwise dump input and continue */
 258 
 259   } while(nready != -1);
 260   
 261   return NULL; /* quit */
 262 }
 263 
 264 
 265 /*++++++++++++++++++++++++++++++++++++++
 266   initialisation for the PTHREAD_CANCEL mode is not needed.
 267   ++++++++++++++++++++++++++++++++++++++*/
 268 static void sk_real_init(void) {
     /* [<][>][^][v][top][bottom][index][help] */
 269   /* EMPTY */
 270 }
 271 
 272 #endif  /* WATCHDOG_BY_SIGNAL */
 273 
 274 
 275 /*++++++++++++++++++++++++++++++++++++++
 276    starts sk_watchdog thread unless already started,
 277    and registers its threadid in the condat structure
 278 
 279    dies if watchdog already running
 280 
 281    er_ret_t SK_watchstart   Returns SK_OK on success.
 282    
 283    sk_conn_st *condat       pointer to the connection data structure
 284 
 285    The structure may (and normally, should) contain the predefined actions
 286    set by SK_watch_set... functions.
 287   ++++++++++++++++++++++++++++++++++++++*/
 288 er_ret_t
 289 SK_watchstart(sk_conn_st *condat)
     /* [<][>][^][v][top][bottom][index][help] */
 290 {
 291   pthread_attr_t attr;
 292   size_t ssize;
 293 
 294   dieif( condat->watchdog != 0 );
 295 
 296   dieif(pthread_attr_init(&attr) != 0);
 297 
 298 #if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \
 299     defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE)
 300   /*********
 301     For SCO, we need to increase the stack size, because the default is
 302     exceedingly small.  This also works on FreeBSD.  In Solaris, the
 303     stack size is 0, which is interpreted as the default, meaning 1
 304     Mbyte for 32-bit processes or 2 Mbyte for 64-bit processes.
 305     However, trying to *set* the stack size to 0 results in an error.
 306     Therefore, we don't want to set the size to 0.  Probably not a good
 307     idea in any event.  :) Linux doesn't support this function (as of
 308     the 2.4.2 kernel).
 309 
 310     Note: see also modules/th/thread.c
 311    *********/
 312   dieif(pthread_attr_getstacksize(&attr, &ssize) != 0);
 313   if (ssize > 0) {
 314       dieif(pthread_attr_setstacksize(&attr, ssize * 4) != 0);
 315   }
 316 #endif
 317   
 318   /* init the mutex in locked state, watchdog will unlock it when 
 319      it's ready for signal/cancellation */
 320   pthread_mutex_init( & condat->watchmutex, NULL );
 321   pthread_mutex_lock( & condat->watchmutex ); 
 322 
 323 /* 
 324    Linux doesn't seem to like getting signals in select(), which isn't
 325    technically allowed by POSIX.  The workaround in this case is simply
 326    to not create a watchdog for Linux.  This is probably acceptable
 327    because we will be changing the query path to perform queries in small
 328    chunks, so if a disconnect occurs it won't consume a lot of database
 329    resources in any case, even without a watchdog.
 330  */
 331 #ifndef __linux__
 332   /* NOT DETACHED! */
 333   pthread_create(&condat->watchdog, &attr, sk_watchdog, (void *) condat );
 334 #endif /* __linux__ */
 335 
 336   dieif(pthread_attr_destroy(&attr) != 0);
 337   
 338   return SK_OK;
 339 }
 340 
 341 
 342 /*++++++++++++++++++++++++++++++++++++++
 343   
 344   stops running sk_watchdog thread. 
 345   If it is not running ( == not registered in the connection struct), 
 346   it does nothing.
 347 
 348   er_ret_t SK_watchstop    always succeeds (returns SK_OK)
 349   
 350   sk_conn_st *condat       pointer to the connection data structure
 351   ++++++++++++++++++++++++++++++++++++++*/
 352 er_ret_t
 353 SK_watchstop(sk_conn_st *condat)
     /* [<][>][^][v][top][bottom][index][help] */
 354 {
 355   void *res;
 356 
 357   if(condat->watchdog > 0) {
 358     int ret;
 359 
 360     /* wait until the watchdog is ready for signal */
 361     pthread_mutex_lock( & condat->watchmutex ); 
 362 
 363 #ifdef WATCHDOG_BY_SIGNAL
 364     ret = pthread_kill(condat->watchdog, SIGUSR2);
 365 #else
 366     ret = pthread_cancel(condat->watchdog);
 367 #endif
 368     
 369     ret = pthread_join(condat->watchdog, &res);
 370     
 371     pthread_mutex_destroy( & condat->watchmutex ); 
 372     condat->watchdog = 0;
 373   }
 374   return SK_OK;
 375 }
 376 
 377 
 378 /*++++++++++++++++++++++++++++++++++++++
 379 
 380   void SK_watch_setkill       sets the thread id of the thread to be
 381                               cancelled by the watchdog watching this socket.
 382                               0 (default) means do not cancel anything.
 383 
 384   sk_conn_st *condat          pointer to the connection data structure.
 385   
 386   pthread_t killthis          thread id of the thread to be cancelled, or 0.
 387   ++++++++++++++++++++++++++++++++++++++*/
 388 void
 389 SK_watch_setkill(sk_conn_st *condat, pthread_t killthis)
     /* [<][>][^][v][top][bottom][index][help] */
 390 {
 391   condat->killthis = killthis;
 392 }
 393 
 394 
 395 /*++++++++++++++++++++++++++++++++++++++
 396   
 397   void SK_watch_setexec       sets the function to be invoked by the watchdog 
 398                               watching this socket. NULL (default) means do
 399                               not invoke anything.
 400   
 401   sk_conn_st *condat          pointer to the connection data structure.
 402   
 403   void *(*function)(void *)   function to be invoked
 404   
 405   void *args                  argument to be passed to the function.
 406 
 407   ++++++++++++++++++++++++++++++++++++++*/
 408 void
 409 SK_watch_setexec( sk_conn_st *condat, void *(*function)(void *) , void *args)
     /* [<][>][^][v][top][bottom][index][help] */
 410 {
 411   condat->execthis = function;
 412   condat->execargs = args;
 413 }
 414 
 415 
 416 /*++++++++++++++++++++++++++++++++++++++
 417   
 418   void SK_watch_setclear      clears the function and thread id fields so that
 419                               nothing gets cancelled or invoked by the 
 420                               watchdog.
 421 
 422   sk_conn_st *condat          pointer to the connection data structure.
 423 
 424   ++++++++++++++++++++++++++++++++++++++*/
 425 void 
 426 SK_watch_setclear(sk_conn_st *condat) 
     /* [<][>][^][v][top][bottom][index][help] */
 427 {
 428   condat->execthis = NULL;
 429   condat->execargs = NULL;
 430   condat->killthis = 0;
 431 }
 432 
 433 /* call the function to be called if defined */
 434 
 435 
 436 /*++++++++++++++++++++++++++++++++++++++
 437 
 438   void SK_watchexec          invokes the predefined function if defined.
 439                              (usually called from the watchdog). 
 440                              Also sets the reason-to-close
 441                              flag on this connection to SK_INTERRUPT.
 442   
 443   sk_conn_st *condat         pointer to the connection data structure.
 444 
 445   ++++++++++++++++++++++++++++++++++++++*/
 446 void 
 447 SK_watchexec(sk_conn_st *condat) 
     /* [<][>][^][v][top][bottom][index][help] */
 448 {
 449   /* set the reason-to-close flag on this connection */
 450   condat->rtc |= SK_INTERRUPT;
 451   
 452   if( condat->execthis != NULL ) {
 453     condat->execthis(condat->execargs);
 454   } 
 455 }
 456 
 457 /* cancel the thread to be cancelled if defined */
 458 
 459 
 460 /*++++++++++++++++++++++++++++++++++++++
 461   
 462   void SK_watchkill          cancels the predefined thread if defined.
 463                              (usually called from the watchdog). 
 464                              Also sets the reason-to-close
 465                              flag on this connection to SK_INTERRUPT.
 466 
 467   sk_conn_st *condat         pointer to the connection data structure.
 468 
 469   ++++++++++++++++++++++++++++++++++++++*/
 470 void 
 471 SK_watchkill(sk_conn_st *condat) {
     /* [<][>][^][v][top][bottom][index][help] */
 472 
 473   /* set the reason-to-close flag on this connection */
 474   condat->rtc |= SK_INTERRUPT;
 475 
 476   /* cancel thread if defined */
 477   if( condat->killthis != 0 ) {
 478     pthread_cancel(condat->killthis);
 479     /* The only possible error is ESRCH, so we do not care about it*/
 480   }
 481 }
 482 
 483 
 484 /*++++++++++++++++++++++++++++++++++++++
 485   
 486   void SK_watchtrigger       Wrapper around SK_watchkill and SK_watchexec.
 487                              First executes the function, then cancels the
 488                              thread.
 489 
 490   sk_conn_st *condat         pointer to the connection data structure.
 491 
 492   ++++++++++++++++++++++++++++++++++++++*/
 493 void SK_watchtrigger(sk_conn_st *condat) 
     /* [<][>][^][v][top][bottom][index][help] */
 494 {
 495      SK_watchexec(condat); 
 496      SK_watchkill(condat);    
 497 }
 498 
 499 
 500 /*++++++++++++++++++++++++++++++++++++++
 501   Initialisation function, should be called exactly once 
 502   (well, it ignores repeated calls). The actions depend on cancellation
 503   mode (signal or pthread_cancel).
 504   ++++++++++++++++++++++++++++++++++++++*/
 505 void  SK_init(void)
     /* [<][>][^][v][top][bottom][index][help] */
 506 {
 507   /* can be called only once */
 508   pthread_once( &sk_init_once, sk_real_init);
 509 }

/* [<][>][^][v][top][bottom][index][help] */