1 | /*************************************** 2 | $Revision: 1.4 $ 3 | 4 | Socket module - cd_watchdog.c - Socket watchdog - when activated, checks the 5 | socket for new data and discards it. If the 6 | socket is closed, it triggers predefined 7 | functions - executes a function and/or 8 | cancels a thread. 9 | 10 | Status: NOT REVUED, TESTED 11 | 12 | Design and implementation by Marek Bukowy. 13 | 14 | Modification history: 15 | marek (August 2000) Created the watchdog part 16 | marek (December 2000) Modified watchdog deactivation - 17 | replaced signals by pthread cancellation. 18 | ******************/ /****************** 19 | Copyright (c) 1999, 2000 RIPE NCC 20 | 21 | All Rights Reserved 22 | 23 | Permission to use, copy, modify, and distribute this software and its 24 | documentation for any purpose and without fee is hereby granted, 25 | provided that the above copyright notice appear in all copies and that 26 | both that copyright notice and this permission notice appear in 27 | supporting documentation, and that the name of the author not be 28 | used in advertising or publicity pertaining to distribution of the 29 | software without specific, written prior permission. 30 | 31 | THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 32 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL 33 | AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 34 | DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 35 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 36 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 37 | ***************************************/ 38 | 39 | #include "sk.h" 40 | /*+ String sizes +*/ 41 | #define STR_S 63 42 | 43 | /*+ Uncomment this to use watchdog deactivation by signal (may be risky) 44 | 45 | #define WATCHDOG_BY_SIGNAL 46 | +*/ 47 | 48 | static pthread_once_t sk_init_once = { PTHREAD_ONCE_INIT }; 49 | 50 | #ifdef WATCHDOG_BY_SIGNAL 51 | 52 | /*+ The signal version is complicated to cope with all timing situations. 53 | It uses a thread specific flag to see if the signal handler was invoked 54 | in case the signal arrives before select(3) is called in watchdog. 55 | +*/ 56 | 57 | /* thread specific flag */ 58 | static pthread_key_t sk_watch_tsd; 59 | 60 | /*++++++++++++++++++++++++++++++++++++++ 61 | initialisation for the SIGNAL cancellation mode 62 | - initialises the thread specific flag. 63 | ++++++++++++++++++++++++++++++++++++++*/ 64 | static void sk_real_init(void) 65 | { 66 | dieif( pthread_key_create( &sk_watch_tsd, NULL) != 0 ); 67 | } 68 | 69 | 70 | /*++++++++++++++++++++++++++++++++++++++ 71 | sk_watchdog signal handler - sets the thread-specific flag. 72 | 73 | int n signal received. (not used) 74 | ++++++++++++++++++++++++++++++++++++++*/ 75 | static void func_sigusr(int n) { 76 | #if 0 77 | /* just for debugging - we don't check the value here */ 78 | int *tsd_flag = (int *) pthread_getspecific(sk_watch_tsd); 79 | #endif 80 | 81 | /* 2000/12/18 MB: 82 | DEADLOCK has happened - the watchdog was just getting a mutex 83 | for the ER rwlock when a signal arrived and the execution of the 84 | pthread_mutex_lock function was interrupted AFTER the lock was 85 | grabbed. The this handler was invoked and tried to get that mutex 86 | again. As a result, everything stopped. 87 | 88 | Cures: 89 | 1. Not invoke this here: 90 | ER_dbg_va(FAC_SK, ASP_SK_GEN,"func_sigusr(%d) called", n); 91 | 92 | 2. Not accept any signals during any pthread calls so that this 93 | does not happen again. Must be reimplemented with pthread_cancel 94 | and all the signal stuff must go away. (Done, 2000/12/19). 95 | */ 96 | /* set a thread-specific flag that the handler was invoked */ 97 | 98 | pthread_setspecific(sk_watch_tsd, (void *)1 ); 99 | } 100 | 101 | /*++++++++++++++++++++++++++++++++++++++ 102 | watchdog (SIGNAL VERSION) - started as a separate thread. 103 | 104 | Selects on the given socket; discards all input. 105 | whenever it sees end of file (socket closed), it 106 | * sets a corresponding flag in the condat structure, 107 | * triggers the predefined actions (by SK_watchtrigger). 108 | 109 | void *arg - pointer to the connection data structure 110 | ++++++++++++++++++++++++++++++++++++++*/ 111 | static 112 | void *sk_watchdog(void *arg) 113 | { 114 | sk_conn_st *condat = (sk_conn_st *) arg; 115 | int nready; 116 | int n; 117 | fd_set rset; 118 | char buff[STR_S]; 119 | int socket = condat->sock; 120 | sigset_t sset; 121 | struct sigaction act; 122 | 123 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */ 124 | 125 | FD_ZERO(&rset); 126 | FD_SET(socket, &rset); 127 | 128 | sigemptyset(&sset); 129 | sigaddset(&sset, SIGUSR2); 130 | 131 | act.sa_handler = func_sigusr; 132 | act.sa_flags = 0; 133 | dieif(sigaction(SIGUSR2, &act, NULL) != 0); 134 | 135 | /* XXX in fact, it's unblocked already. Should be blocked on startup */ 136 | dieif(pthread_sigmask(SIG_UNBLOCK, &sset, NULL) != 0); 137 | 138 | /* clear the handler's flag */ 139 | pthread_setspecific(sk_watch_tsd, NULL); 140 | 141 | /* now ready for signal */ 142 | pthread_mutex_unlock( & condat->watchmutex ); 143 | 144 | /* hey, viva threaded signal handling! There is no way for select 145 | to unblock a blocked signal, It must be done by "hand" (above). 146 | 147 | Consequently, every once in a while, the signal will be delivered 148 | before the select starts :-/. So, we have to introduce a timeout 149 | for select and check if the signal was delivered anyway....aARGH!!! 150 | 151 | This adds a <timeout interval> to unlucky queries, about 0.1% of all. 152 | */ 153 | 154 | while ((nready=select(socket+1, &rset, NULL, NULL, &timeout))!=-1) { 155 | 156 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready); 157 | 158 | /* don't even try to read if we have been killed */ 159 | if( errno == EINTR || pthread_getspecific(sk_watch_tsd) != NULL ) { 160 | break; 161 | } 162 | 163 | /* retry if the timeout has triggered */ 164 | if( nready == 0 ) { 165 | continue; 166 | } 167 | 168 | /* There was some input or client half of connection was closed */ 169 | /* Check for the latter */ 170 | if (( n=read(socket, buff, sizeof(buff))) == 0) { 171 | /* Connection was closed by client */ 172 | /* Now send a cancellation request to the whois thread. */ 173 | /* mysql thread will be terminated by thread cleanup routine */ 174 | 175 | /* call the actions: kill and exec (the SK_ functions called 176 | check if the action is defined. Will set the RTC flag on condat 177 | */ 178 | SK_watchtrigger(condat); 179 | 180 | /* quit */ 181 | break; 182 | } 183 | /* Otherwise dump input and continue */ 184 | 185 | } 186 | 187 | /* Exit the watchdog thread, passing NULL as we don't expect a join */ 188 | pthread_exit(NULL); 189 | 190 | /* oh yes. Shouldn't compilers _recognize_ library functions ? */ 191 | return NULL; 192 | } 193 | 194 | 195 | #else /* not WATCHDOG_BY_SIGNAL */ 196 | 197 | 198 | /*++++++++++++++++++++++++++++++++++++++ 199 | watchdog (CANCEL VERSION) - started as a separate thread. 200 | 201 | Selects on the given socket; discards all input. 202 | whenever it sees end of file (socket closed), it 203 | * sets a corresponding flag in the condat structure, 204 | * triggers the predefined actions (by SK_watchtrigger). 205 | 206 | void *arg - pointer to the connection data structure 207 | ++++++++++++++++++++++++++++++++++++++*/ 208 | static 209 | void *sk_watchdog(void *arg) 210 | { 211 | sk_conn_st *condat = (sk_conn_st *) arg; 212 | int nready; 213 | int n; 214 | char buff[STR_S]; 215 | int socket = condat->sock; 216 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */ 217 | fd_set rset; 218 | 219 | /* this is to allow cancellation of the select(3) call */ 220 | pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); 221 | 222 | /* now ready for the cancellation */ 223 | pthread_mutex_unlock( & condat->watchmutex ); 224 | 225 | FD_ZERO(&rset); 226 | FD_SET(socket, &rset); 227 | do { 228 | /* run the select exposed to cancellation */ 229 | pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); 230 | nready=select(socket+1, &rset, NULL, NULL, &timeout); 231 | pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); 232 | 233 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready); 234 | /* quit on error */ 235 | if( nready < 0 ) { 236 | break; 237 | } 238 | 239 | /* retry if the timeout has triggered */ 240 | if( nready == 0 ) { 241 | continue; 242 | } 243 | 244 | /* There was some input or client half of connection was closed */ 245 | /* Check for the latter */ 246 | if (( n=read(socket, buff, sizeof(buff))) == 0) { 247 | /* Connection was closed by client */ 248 | /* Now send a cancellation request to the whois thread. */ 249 | /* mysql thread will be terminated by thread cleanup routine */ 250 | 251 | /* call the actions: kill and exec (the SK_ functions called 252 | check if the action is defined. Will set the RTC flag on condat 253 | */ 254 | SK_watchtrigger(condat); 255 | 256 | /* quit */ 257 | break; 258 | } 259 | /* Otherwise dump input and continue */ 260 | 261 | } while(nready != -1); 262 | 263 | return NULL; /* quit */ 264 | } 265 | 266 | 267 | /*++++++++++++++++++++++++++++++++++++++ 268 | initialisation for the PTHREAD_CANCEL mode is not needed. 269 | ++++++++++++++++++++++++++++++++++++++*/ 270 | static void sk_real_init(void) { 271 | /* EMPTY */ 272 | } 273 | 274 | #endif /* WATCHDOG_BY_SIGNAL */ 275 | 276 | 277 | /*++++++++++++++++++++++++++++++++++++++ 278 | starts sk_watchdog thread unless already started, 279 | and registers its threadid in the condat structure 280 | 281 | dies if watchdog already running 282 | 283 | er_ret_t SK_watchstart Returns SK_OK on success. 284 | 285 | sk_conn_st *condat pointer to the connection data structure 286 | 287 | The structure may (and normally, should) contain the predefined actions 288 | set by SK_watch_set... functions. 289 | ++++++++++++++++++++++++++++++++++++++*/ 290 | er_ret_t 291 | SK_watchstart(sk_conn_st *condat) 292 | { 293 | pthread_attr_t attr; 294 | size_t ssize; 295 | int ret; 296 | 297 | dieif( condat->watchdog != 0 ); 298 | 299 | dieif(pthread_attr_init(&attr) != 0); 300 | 301 | #if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \ 302 | defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE) 303 | /********* 304 | For SCO, we need to increase the stack size, because the default is 305 | exceedingly small. This also works on FreeBSD. In Solaris, the 306 | stack size is 0, which is interpreted as the default, meaning 1 307 | Mbyte for 32-bit processes or 2 Mbyte for 64-bit processes. 308 | However, trying to *set* the stack size to 0 results in an error. 309 | Therefore, we don't want to set the size to 0. Probably not a good 310 | idea in any event. :) Linux doesn't support this function (as of 311 | the 2.4.2 kernel). 312 | 313 | Note: see also modules/th/thread.c 314 | *********/ 315 | dieif(pthread_attr_getstacksize(&attr, &ssize) != 0); 316 | if (ssize > 0) { 317 | dieif(pthread_attr_setstacksize(&attr, ssize * 4) != 0); 318 | } 319 | #endif 320 | 321 | /* init the mutex in locked state, watchdog will unlock it when 322 | it's ready for signal/cancellation */ 323 | pthread_mutex_init( & condat->watchmutex, NULL ); 324 | pthread_mutex_lock( & condat->watchmutex ); 325 | 326 | /* NOT DETACHED! */ 327 | pthread_create(&condat->watchdog, &attr, sk_watchdog, (void *) condat ); 328 | 329 | dieif(pthread_attr_destroy(&attr) != 0); 330 | 331 | return SK_OK; 332 | } 333 | 334 | 335 | /*++++++++++++++++++++++++++++++++++++++ 336 | 337 | stops running sk_watchdog thread. 338 | If it is not running ( == not registered in the connection struct), 339 | it does nothing. 340 | 341 | er_ret_t SK_watchstop always succeeds (returns SK_OK) 342 | 343 | sk_conn_st *condat pointer to the connection data structure 344 | ++++++++++++++++++++++++++++++++++++++*/ 345 | er_ret_t 346 | SK_watchstop(sk_conn_st *condat) 347 | { 348 | void *res; 349 | 350 | if(condat->watchdog > 0) { 351 | int ret; 352 | 353 | /* wait until the watchdog is ready for signal */ 354 | pthread_mutex_lock( & condat->watchmutex ); 355 | 356 | #ifdef WATCHDOG_BY_SIGNAL 357 | ret = pthread_kill(condat->watchdog, SIGUSR2); 358 | #else 359 | ret = pthread_cancel(condat->watchdog); 360 | #endif 361 | 362 | ret = pthread_join(condat->watchdog, &res); 363 | 364 | pthread_mutex_destroy( & condat->watchmutex ); 365 | condat->watchdog = 0; 366 | } 367 | return SK_OK; 368 | } 369 | 370 | 371 | /*++++++++++++++++++++++++++++++++++++++ 372 | 373 | void SK_watch_setkill sets the thread id of the thread to be 374 | cancelled by the watchdog watching this socket. 375 | 0 (default) means do not cancel anything. 376 | 377 | sk_conn_st *condat pointer to the connection data structure. 378 | 379 | pthread_t killthis thread id of the thread to be cancelled, or 0. 380 | ++++++++++++++++++++++++++++++++++++++*/ 381 | void 382 | SK_watch_setkill(sk_conn_st *condat, pthread_t killthis) 383 | { 384 | condat->killthis = killthis; 385 | } 386 | 387 | 388 | /*++++++++++++++++++++++++++++++++++++++ 389 | 390 | void SK_watch_setexec sets the function to be invoked by the watchdog 391 | watching this socket. NULL (default) means do 392 | not invoke anything. 393 | 394 | sk_conn_st *condat pointer to the connection data structure. 395 | 396 | void *(*function)(void *) function to be invoked 397 | 398 | void *args argument to be passed to the function. 399 | 400 | ++++++++++++++++++++++++++++++++++++++*/ 401 | void 402 | SK_watch_setexec( sk_conn_st *condat, void *(*function)(void *) , void *args) 403 | { 404 | condat->execthis = function; 405 | condat->execargs = args; 406 | } 407 | 408 | 409 | /*++++++++++++++++++++++++++++++++++++++ 410 | 411 | void SK_watch_setclear clears the function and thread id fields so that 412 | nothing gets cancelled or invoked by the 413 | watchdog. 414 | 415 | sk_conn_st *condat pointer to the connection data structure. 416 | 417 | ++++++++++++++++++++++++++++++++++++++*/ 418 | void 419 | SK_watch_setclear(sk_conn_st *condat) 420 | { 421 | condat->execthis = NULL; 422 | condat->execargs = NULL; 423 | condat->killthis = 0; 424 | } 425 | 426 | /* call the function to be called if defined */ 427 | 428 | 429 | /*++++++++++++++++++++++++++++++++++++++ 430 | 431 | void SK_watchexec invokes the predefined function if defined. 432 | (usually called from the watchdog). 433 | Also sets the reason-to-close 434 | flag on this connection to SK_INTERRUPT. 435 | 436 | sk_conn_st *condat pointer to the connection data structure. 437 | 438 | ++++++++++++++++++++++++++++++++++++++*/ 439 | void 440 | SK_watchexec(sk_conn_st *condat) 441 | { 442 | /* set the reason-to-close flag on this connection */ 443 | condat->rtc |= SK_INTERRUPT; 444 | 445 | if( condat->execthis != NULL ) { 446 | condat->execthis(condat->execargs); 447 | } 448 | } 449 | 450 | /* cancel the thread to be cancelled if defined */ 451 | 452 | 453 | /*++++++++++++++++++++++++++++++++++++++ 454 | 455 | void SK_watchkill cancels the predefined thread if defined. 456 | (usually called from the watchdog). 457 | Also sets the reason-to-close 458 | flag on this connection to SK_INTERRUPT. 459 | 460 | sk_conn_st *condat pointer to the connection data structure. 461 | 462 | ++++++++++++++++++++++++++++++++++++++*/ 463 | void 464 | SK_watchkill(sk_conn_st *condat) { 465 | 466 | /* set the reason-to-close flag on this connection */ 467 | condat->rtc |= SK_INTERRUPT; 468 | 469 | /* cancel thread if defined */ 470 | if( condat->killthis != 0 ) { 471 | pthread_cancel(condat->killthis); 472 | /* The only possible error is ESRCH, so we do not care about it*/ 473 | } 474 | } 475 | 476 | 477 | /*++++++++++++++++++++++++++++++++++++++ 478 | 479 | void SK_watchtrigger Wrapper around SK_watchkill and SK_watchexec. 480 | First executes the function, then cancels the 481 | thread. 482 | 483 | sk_conn_st *condat pointer to the connection data structure. 484 | 485 | ++++++++++++++++++++++++++++++++++++++*/ 486 | void SK_watchtrigger(sk_conn_st *condat) 487 | { 488 | SK_watchexec(condat); 489 | SK_watchkill(condat); 490 | } 491 | 492 | 493 | /*++++++++++++++++++++++++++++++++++++++ 494 | Initialisation function, should be called exactly once 495 | (well, it ignores repeated calls). The actions depend on cancellation 496 | mode (signal or pthread_cancel). 497 | ++++++++++++++++++++++++++++++++++++++*/ 498 | void SK_init(void) 499 | { 500 | /* can be called only once */ 501 | pthread_once( &sk_init_once, sk_real_init); 502 | }