1 | /*************************************** 2 | $Revision: 1.8 $ 3 | 4 | Socket module - cd_watchdog.c - Socket watchdog - when activated, checks the 5 | socket for new data and discards it. If the 6 | socket is closed, it triggers predefined 7 | functions - executes a function and/or 8 | cancels a thread. 9 | 10 | Status: NOT REVUED, TESTED 11 | 12 | Design and implementation by Marek Bukowy. 13 | 14 | Modification history: 15 | marek (August 2000) Created the watchdog part 16 | marek (December 2000) Modified watchdog deactivation - 17 | replaced signals by pthread cancellation. 18 | ******************/ /****************** 19 | Copyright (c) 1999,2000,2001,2002 RIPE NCC 20 | 21 | All Rights Reserved 22 | 23 | Permission to use, copy, modify, and distribute this software and its 24 | documentation for any purpose and without fee is hereby granted, 25 | provided that the above copyright notice appear in all copies and that 26 | both that copyright notice and this permission notice appear in 27 | supporting documentation, and that the name of the author not be 28 | used in advertising or publicity pertaining to distribution of the 29 | software without specific, written prior permission. 30 | 31 | THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING 32 | ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS; IN NO EVENT SHALL 33 | AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 34 | DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 35 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 36 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 37 | ***************************************/ 38 | 39 | #include "rip.h" 40 | 41 | /*+ Uncomment this to use watchdog deactivation by signal (may be risky) 42 | 43 | #define WATCHDOG_BY_SIGNAL 44 | +*/ 45 | 46 | static pthread_once_t sk_init_once = PTHREAD_ONCE_INIT; 47 | 48 | #ifdef WATCHDOG_BY_SIGNAL 49 | 50 | /*+ The signal version is complicated to cope with all timing situations. 51 | It uses a thread specific flag to see if the signal handler was invoked 52 | in case the signal arrives before select(3) is called in watchdog. 53 | +*/ 54 | 55 | /* thread specific flag */ 56 | static pthread_key_t sk_watch_tsd; 57 | 58 | /*++++++++++++++++++++++++++++++++++++++ 59 | initialisation for the SIGNAL cancellation mode 60 | - initialises the thread specific flag. 61 | ++++++++++++++++++++++++++++++++++++++*/ 62 | static void sk_real_init(void) 63 | { 64 | dieif( pthread_key_create( &sk_watch_tsd, NULL) != 0 ); 65 | } 66 | 67 | 68 | /*++++++++++++++++++++++++++++++++++++++ 69 | sk_watchdog signal handler - sets the thread-specific flag. 70 | 71 | int n signal received. (not used) 72 | ++++++++++++++++++++++++++++++++++++++*/ 73 | static void func_sigusr(int n) { 74 | #if 0 75 | /* just for debugging - we don't check the value here */ 76 | int *tsd_flag = (int *) pthread_getspecific(sk_watch_tsd); 77 | #endif 78 | 79 | /* 2000/12/18 MB: 80 | DEADLOCK has happened - the watchdog was just getting a mutex 81 | for the ER rwlock when a signal arrived and the execution of the 82 | pthread_mutex_lock function was interrupted AFTER the lock was 83 | grabbed. The this handler was invoked and tried to get that mutex 84 | again. As a result, everything stopped. 85 | 86 | Cures: 87 | 1. Not invoke this here: 88 | ER_dbg_va(FAC_SK, ASP_SK_GEN,"func_sigusr(%d) called", n); 89 | 90 | 2. Not accept any signals during any pthread calls so that this 91 | does not happen again. Must be reimplemented with pthread_cancel 92 | and all the signal stuff must go away. (Done, 2000/12/19). 93 | */ 94 | /* set a thread-specific flag that the handler was invoked */ 95 | 96 | pthread_setspecific(sk_watch_tsd, (void *)1 ); 97 | } 98 | 99 | /*++++++++++++++++++++++++++++++++++++++ 100 | watchdog (SIGNAL VERSION) - started as a separate thread. 101 | 102 | Selects on the given socket; discards all input. 103 | whenever it sees end of file (socket closed), it 104 | * sets a corresponding flag in the condat structure, 105 | * triggers the predefined actions (by SK_watchtrigger). 106 | 107 | void *arg - pointer to the connection data structure 108 | ++++++++++++++++++++++++++++++++++++++*/ 109 | static 110 | void *sk_watchdog(void *arg) 111 | { 112 | sk_conn_st *condat = (sk_conn_st *) arg; 113 | int nready; 114 | int n; 115 | fd_set rset; 116 | char buff[STR_S]; 117 | int socket = condat->sock; 118 | sigset_t sset; 119 | struct sigaction act; 120 | 121 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */ 122 | 123 | FD_ZERO(&rset); 124 | FD_SET(socket, &rset); 125 | 126 | sigemptyset(&sset); 127 | sigaddset(&sset, SIGUSR2); 128 | 129 | act.sa_handler = func_sigusr; 130 | act.sa_flags = 0; 131 | dieif(sigaction(SIGUSR2, &act, NULL) != 0); 132 | 133 | /* XXX in fact, it's unblocked already. Should be blocked on startup */ 134 | dieif(pthread_sigmask(SIG_UNBLOCK, &sset, NULL) != 0); 135 | 136 | /* clear the handler's flag */ 137 | pthread_setspecific(sk_watch_tsd, NULL); 138 | 139 | /* now ready for signal */ 140 | pthread_mutex_unlock( & condat->watchmutex ); 141 | 142 | /* hey, viva threaded signal handling! There is no way for select 143 | to unblock a blocked signal, It must be done by "hand" (above). 144 | 145 | Consequently, every once in a while, the signal will be delivered 146 | before the select starts :-/. So, we have to introduce a timeout 147 | for select and check if the signal was delivered anyway....aARGH!!! 148 | 149 | This adds a <timeout interval> to unlucky queries, about 0.1% of all. 150 | */ 151 | 152 | while ((nready=select(socket+1, &rset, NULL, NULL, &timeout))!=-1) { 153 | 154 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready); 155 | 156 | /* don't even try to read if we have been killed */ 157 | if( errno == EINTR || pthread_getspecific(sk_watch_tsd) != NULL ) { 158 | break; 159 | } 160 | 161 | /* retry if the timeout has triggered */ 162 | if( nready == 0 ) { 163 | continue; 164 | } 165 | 166 | /* There was some input or client half of connection was closed */ 167 | /* Check for the latter */ 168 | if (( n=read(socket, buff, sizeof(buff))) == 0) { 169 | /* Connection was closed by client */ 170 | /* Now send a cancellation request to the whois thread. */ 171 | /* mysql thread will be terminated by thread cleanup routine */ 172 | 173 | /* call the actions: kill and exec (the SK_ functions called 174 | check if the action is defined. Will set the RTC flag on condat 175 | */ 176 | SK_watchtrigger(condat); 177 | 178 | /* quit */ 179 | break; 180 | } 181 | /* Otherwise dump input and continue */ 182 | 183 | } 184 | 185 | /* Exit the watchdog thread, passing NULL as we don't expect a join */ 186 | pthread_exit(NULL); 187 | 188 | /* oh yes. Shouldn't compilers _recognize_ library functions ? */ 189 | return NULL; 190 | } 191 | 192 | 193 | #else /* not WATCHDOG_BY_SIGNAL */ 194 | 195 | 196 | /*++++++++++++++++++++++++++++++++++++++ 197 | watchdog (CANCEL VERSION) - started as a separate thread. 198 | 199 | Selects on the given socket; discards all input. 200 | whenever it sees end of file (socket closed), it 201 | * sets a corresponding flag in the condat structure, 202 | * triggers the predefined actions (by SK_watchtrigger). 203 | 204 | void *arg - pointer to the connection data structure 205 | ++++++++++++++++++++++++++++++++++++++*/ 206 | static 207 | void *sk_watchdog(void *arg) 208 | { 209 | sk_conn_st *condat = (sk_conn_st *) arg; 210 | int nready; 211 | int n; 212 | char buff[STR_S]; 213 | int socket = condat->sock; 214 | struct timeval timeout = { 1, 0 }; /* it's a timeout of 1 second */ 215 | fd_set rset; 216 | 217 | /* this is to allow cancellation of the select(3) call */ 218 | pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); 219 | 220 | /* now ready for the cancellation */ 221 | pthread_mutex_unlock( & condat->watchmutex ); 222 | 223 | FD_ZERO(&rset); 224 | FD_SET(socket, &rset); 225 | do { 226 | /* run the select exposed to cancellation */ 227 | pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); 228 | nready=select(socket+1, &rset, NULL, NULL, &timeout); 229 | pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); 230 | 231 | ER_dbg_va(FAC_SK, ASP_SK_WATCH,"select returned %d", nready); 232 | /* quit on error */ 233 | if( nready < 0 ) { 234 | break; 235 | } 236 | 237 | /* retry if the timeout has triggered */ 238 | if( nready == 0 ) { 239 | continue; 240 | } 241 | 242 | /* There was some input or client half of connection was closed */ 243 | /* Check for the latter */ 244 | if (( n=read(socket, buff, sizeof(buff))) == 0) { 245 | /* Connection was closed by client */ 246 | /* Now send a cancellation request to the whois thread. */ 247 | /* mysql thread will be terminated by thread cleanup routine */ 248 | 249 | /* call the actions: kill and exec (the SK_ functions called 250 | check if the action is defined. Will set the RTC flag on condat 251 | */ 252 | SK_watchtrigger(condat); 253 | 254 | /* quit */ 255 | break; 256 | } 257 | /* Otherwise dump input and continue */ 258 | 259 | } while(nready != -1); 260 | 261 | return NULL; /* quit */ 262 | } 263 | 264 | 265 | /*++++++++++++++++++++++++++++++++++++++ 266 | initialisation for the PTHREAD_CANCEL mode is not needed. 267 | ++++++++++++++++++++++++++++++++++++++*/ 268 | static void sk_real_init(void) { 269 | /* EMPTY */ 270 | } 271 | 272 | #endif /* WATCHDOG_BY_SIGNAL */ 273 | 274 | 275 | /*++++++++++++++++++++++++++++++++++++++ 276 | starts sk_watchdog thread unless already started, 277 | and registers its threadid in the condat structure 278 | 279 | dies if watchdog already running 280 | 281 | er_ret_t SK_watchstart Returns SK_OK on success. 282 | 283 | sk_conn_st *condat pointer to the connection data structure 284 | 285 | The structure may (and normally, should) contain the predefined actions 286 | set by SK_watch_set... functions. 287 | ++++++++++++++++++++++++++++++++++++++*/ 288 | er_ret_t 289 | SK_watchstart(sk_conn_st *condat) 290 | { 291 | pthread_attr_t attr; 292 | size_t ssize; 293 | 294 | dieif( condat->watchdog != 0 ); 295 | 296 | dieif(pthread_attr_init(&attr) != 0); 297 | 298 | #if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \ 299 | defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE) 300 | /********* 301 | For SCO, we need to increase the stack size, because the default is 302 | exceedingly small. This also works on FreeBSD. In Solaris, the 303 | stack size is 0, which is interpreted as the default, meaning 1 304 | Mbyte for 32-bit processes or 2 Mbyte for 64-bit processes. 305 | However, trying to *set* the stack size to 0 results in an error. 306 | Therefore, we don't want to set the size to 0. Probably not a good 307 | idea in any event. :) Linux doesn't support this function (as of 308 | the 2.4.2 kernel). 309 | 310 | Note: see also modules/th/thread.c 311 | *********/ 312 | dieif(pthread_attr_getstacksize(&attr, &ssize) != 0); 313 | if (ssize > 0) { 314 | dieif(pthread_attr_setstacksize(&attr, ssize * 4) != 0); 315 | } 316 | #endif 317 | 318 | /* init the mutex in locked state, watchdog will unlock it when 319 | it's ready for signal/cancellation */ 320 | pthread_mutex_init( & condat->watchmutex, NULL ); 321 | pthread_mutex_lock( & condat->watchmutex ); 322 | 323 | /* 324 | Linux doesn't seem to like getting signals in select(), which isn't 325 | technically allowed by POSIX. The workaround in this case is simply 326 | to not create a watchdog for Linux. This is probably acceptable 327 | because we will be changing the query path to perform queries in small 328 | chunks, so if a disconnect occurs it won't consume a lot of database 329 | resources in any case, even without a watchdog. 330 | */ 331 | #ifndef __linux__ 332 | /* NOT DETACHED! */ 333 | pthread_create(&condat->watchdog, &attr, sk_watchdog, (void *) condat ); 334 | #endif /* __linux__ */ 335 | 336 | dieif(pthread_attr_destroy(&attr) != 0); 337 | 338 | return SK_OK; 339 | } 340 | 341 | 342 | /*++++++++++++++++++++++++++++++++++++++ 343 | 344 | stops running sk_watchdog thread. 345 | If it is not running ( == not registered in the connection struct), 346 | it does nothing. 347 | 348 | er_ret_t SK_watchstop always succeeds (returns SK_OK) 349 | 350 | sk_conn_st *condat pointer to the connection data structure 351 | ++++++++++++++++++++++++++++++++++++++*/ 352 | er_ret_t 353 | SK_watchstop(sk_conn_st *condat) 354 | { 355 | void *res; 356 | 357 | if(condat->watchdog > 0) { 358 | int ret; 359 | 360 | /* wait until the watchdog is ready for signal */ 361 | pthread_mutex_lock( & condat->watchmutex ); 362 | 363 | #ifdef WATCHDOG_BY_SIGNAL 364 | ret = pthread_kill(condat->watchdog, SIGUSR2); 365 | #else 366 | ret = pthread_cancel(condat->watchdog); 367 | #endif 368 | 369 | ret = pthread_join(condat->watchdog, &res); 370 | 371 | pthread_mutex_destroy( & condat->watchmutex ); 372 | condat->watchdog = 0; 373 | } 374 | return SK_OK; 375 | } 376 | 377 | 378 | /*++++++++++++++++++++++++++++++++++++++ 379 | 380 | void SK_watch_setkill sets the thread id of the thread to be 381 | cancelled by the watchdog watching this socket. 382 | 0 (default) means do not cancel anything. 383 | 384 | sk_conn_st *condat pointer to the connection data structure. 385 | 386 | pthread_t killthis thread id of the thread to be cancelled, or 0. 387 | ++++++++++++++++++++++++++++++++++++++*/ 388 | void 389 | SK_watch_setkill(sk_conn_st *condat, pthread_t killthis) 390 | { 391 | condat->killthis = killthis; 392 | } 393 | 394 | 395 | /*++++++++++++++++++++++++++++++++++++++ 396 | 397 | void SK_watch_setexec sets the function to be invoked by the watchdog 398 | watching this socket. NULL (default) means do 399 | not invoke anything. 400 | 401 | sk_conn_st *condat pointer to the connection data structure. 402 | 403 | void *(*function)(void *) function to be invoked 404 | 405 | void *args argument to be passed to the function. 406 | 407 | ++++++++++++++++++++++++++++++++++++++*/ 408 | void 409 | SK_watch_setexec( sk_conn_st *condat, void *(*function)(void *) , void *args) 410 | { 411 | condat->execthis = function; 412 | condat->execargs = args; 413 | } 414 | 415 | 416 | /*++++++++++++++++++++++++++++++++++++++ 417 | 418 | void SK_watch_setclear clears the function and thread id fields so that 419 | nothing gets cancelled or invoked by the 420 | watchdog. 421 | 422 | sk_conn_st *condat pointer to the connection data structure. 423 | 424 | ++++++++++++++++++++++++++++++++++++++*/ 425 | void 426 | SK_watch_setclear(sk_conn_st *condat) 427 | { 428 | condat->execthis = NULL; 429 | condat->execargs = NULL; 430 | condat->killthis = 0; 431 | } 432 | 433 | /* call the function to be called if defined */ 434 | 435 | 436 | /*++++++++++++++++++++++++++++++++++++++ 437 | 438 | void SK_watchexec invokes the predefined function if defined. 439 | (usually called from the watchdog). 440 | Also sets the reason-to-close 441 | flag on this connection to SK_INTERRUPT. 442 | 443 | sk_conn_st *condat pointer to the connection data structure. 444 | 445 | ++++++++++++++++++++++++++++++++++++++*/ 446 | void 447 | SK_watchexec(sk_conn_st *condat) 448 | { 449 | /* set the reason-to-close flag on this connection */ 450 | condat->rtc |= SK_INTERRUPT; 451 | 452 | if( condat->execthis != NULL ) { 453 | condat->execthis(condat->execargs); 454 | } 455 | } 456 | 457 | /* cancel the thread to be cancelled if defined */ 458 | 459 | 460 | /*++++++++++++++++++++++++++++++++++++++ 461 | 462 | void SK_watchkill cancels the predefined thread if defined. 463 | (usually called from the watchdog). 464 | Also sets the reason-to-close 465 | flag on this connection to SK_INTERRUPT. 466 | 467 | sk_conn_st *condat pointer to the connection data structure. 468 | 469 | ++++++++++++++++++++++++++++++++++++++*/ 470 | void 471 | SK_watchkill(sk_conn_st *condat) { 472 | 473 | /* set the reason-to-close flag on this connection */ 474 | condat->rtc |= SK_INTERRUPT; 475 | 476 | /* cancel thread if defined */ 477 | if( condat->killthis != 0 ) { 478 | pthread_cancel(condat->killthis); 479 | /* The only possible error is ESRCH, so we do not care about it*/ 480 | } 481 | } 482 | 483 | 484 | /*++++++++++++++++++++++++++++++++++++++ 485 | 486 | void SK_watchtrigger Wrapper around SK_watchkill and SK_watchexec. 487 | First executes the function, then cancels the 488 | thread. 489 | 490 | sk_conn_st *condat pointer to the connection data structure. 491 | 492 | ++++++++++++++++++++++++++++++++++++++*/ 493 | void SK_watchtrigger(sk_conn_st *condat) 494 | { 495 | SK_watchexec(condat); 496 | SK_watchkill(condat); 497 | } 498 | 499 | 500 | /*++++++++++++++++++++++++++++++++++++++ 501 | Initialisation function, should be called exactly once 502 | (well, it ignores repeated calls). The actions depend on cancellation 503 | mode (signal or pthread_cancel). 504 | ++++++++++++++++++++++++++++++++++++++*/ 505 | void SK_init(void) 506 | { 507 | /* can be called only once */ 508 | pthread_once( &sk_init_once, sk_real_init); 509 | }