1 /* This file is part of the Project Athena Zephyr Notification System.
2 * It contains functions for communication with other servers.
4 * Created by: John T. Kohl
9 * Copyright (c) 1987, 1991 by the Massachusetts Institute of Technology.
10 * For copying and distribution information, see the file
14 #include <zephyr/mit-copyright.h>
16 #include <sys/socket.h>
21 static const char rcsid_server_c[] = "$Id$";
25 #define SRV_NACKTAB_HASHSIZE 1023
26 #define SRV_NACKTAB_HASHVAL(which, uid) (((which) ^ (uid).zuid_addr.s_addr ^ \
27 (uid).tv.tv_sec ^ (uid).tv.tv_usec) \
28 % SRV_NACKTAB_HASHSIZE)
30 * Server manager. Deal with traffic to and from other servers.
34 * void server_shutdown()
36 * void server_timo(which)
39 * void server_dispatch(notice, auth, who)
42 * struct sockaddr_in *who;
44 * void server_recover(client)
47 * void server_adispatch(notice, auth, who, server)
50 * struct sockaddr_in *who;
53 * void server_forward(notice, auth, who)
56 * struct sockaddr_in *who;
58 * Server *server_which_server(who)
59 * struct sockaddr_in *who;
61 * void server_kill_clt(client);
64 * void server_dump_servers(fp);
67 * void server_reset();
70 static void server_flush __P((Server *));
71 static void hello_respond __P((struct sockaddr_in *, int, int));
72 static void srv_responded __P((struct sockaddr_in *));
73 static void send_msg __P((struct sockaddr_in *, char *, int));
74 static void send_msg_list __P((struct sockaddr_in *, char *, char **, int,
76 static void srv_nack_cancel __P((ZNotice_t *, struct sockaddr_in *));
77 static void srv_nack_release __P((Server *));
78 static void srv_nack_renumber __P((int *));
79 static void send_stats __P((struct sockaddr_in *));
80 static void server_queue __P((Server *, int, void *, int,
81 struct sockaddr_in *));
82 static void server_hello __P((Server *, int));
83 static void setup_server __P((Server *, struct in_addr *));
84 static void srv_rexmit __P((void *));
85 static void server_forw_reliable __P((Server *, caddr_t, int, ZNotice_t *));
86 static Code_t admin_dispatch __P((ZNotice_t *, int, struct sockaddr_in *,
88 static Code_t kill_clt __P((ZNotice_t *, Server *));
89 static Code_t extract_addr __P((ZNotice_t *, struct sockaddr_in *));
92 static Code_t server_register();
95 static int get_server_galaxy __P((const char *));
97 char my_galaxy[MAXHOSTNAMELEN]; /* my galaxy name */
99 static Unacked *srv_nacktab[SRV_NACKTAB_HASHSIZE];
100 Server *otherservers; /* points to an array of the known
102 int nservers; /* number of other servers */
103 int me_server_idx; /* # of my entry in the array */
105 #define ADJUST (1) /* adjust timeout on hello input */
106 #define DONT_ADJUST (0) /* don't adjust timeout */
108 /* parameters controlling the transitions of the FSM's--patchable with adb */
109 long timo_up = TIMO_UP;
110 long timo_tardy = TIMO_TARDY;
111 long timo_dead = TIMO_DEAD;
117 * Initialize the array of servers. The `limbo' server goes in the first
118 * slot (otherservers[0]).
119 * Contact Hesiod to find all the other servers, allocate space for the
120 * structure, initialize them all to SERV_DEAD with expired timeouts.
121 * Set up a list header for server_forward retransmits.
128 struct in_addr *serv_addr, limbo_addr;
130 /* we don't need to mask SIGFPE here since when we are called,
131 the signal handler isn't set up yet. */
133 if (get_server_galaxy(localconf_file)) {
134 syslog(LOG_ERR, "Could not read galaxy from %s", localconf_file);
138 for (j=0; j<ngalaxies; j++)
139 if (strcmp(galaxy_list[j].galaxy_config.galaxy, my_galaxy) == 0)
142 if (j == ngalaxies) {
143 syslog(LOG_ERR, "Local galaxy (%s) not found in galaxy list",
153 /* increment servers to make room for 'limbo' */
154 nservers = galaxy_list[j].galaxy_config.nservers+1;
156 otherservers = (Server *) malloc(nservers * sizeof(Server));
160 limbo_addr.s_addr = 0;
161 setup_server(otherservers, &limbo_addr);
162 timer_reset(otherservers[0].timer);
163 otherservers[0].timer = NULL;
164 otherservers[0].queue = NULL;
165 otherservers[0].dumping = 0;
167 for (i = 1; i < nservers; i++) {
168 serv_addr = &galaxy_list[j].galaxy_config.server_list[i-1].addr;
170 setup_server(&otherservers[i], serv_addr);
172 if (serv_addr->s_addr == my_addr.s_addr) {
174 otherservers[i].state = SERV_UP;
175 timer_reset(otherservers[i].timer);
176 otherservers[i].timer = NULL;
177 otherservers[i].queue = NULL;
178 otherservers[i].dumping = 0;
180 zdbug((LOG_DEBUG,"found myself"));
185 if (me_server_idx == -1) {
186 syslog(LOG_ERR, "I'm a renegade server!");
192 * server_reset: re-initializes otherservers array by refreshing from Hesiod
195 * If any server is no longer named in the new list, and that server is in
196 * state SERV_DEAD, it is dropped from the server list.
197 * All other currently-known servers are retained.
198 * Any additional servers not previously known are added to the table.
200 * WARNING: Don't call this routine if any of the ancestor procedures have a
201 * handle on a particular server other than by indexing on otherservers[].
207 struct in_addr *serv_addr;
210 int *ok_list_new, *ok_list_old;
214 zdbug((LOG_DEBUG, "server_reset"));
218 syslog(LOG_INFO, "server_reset while alone, punt");
223 /* Find out what servers are supposed to be known. */
224 for (k=0; k<ngalaxies; k++)
225 if (strcmp(galaxy_list[k].galaxy_config.galaxy, my_galaxy) == 0)
228 if (k == ngalaxies) {
229 syslog(LOG_ERR, "server_reset: Local galaxy (%s) not found in galaxy list",
234 ok_list_new = (int *) malloc(galaxy_list[k].galaxy_config.nservers * sizeof(int));
236 syslog(LOG_ERR, "server_reset no mem new");
239 ok_list_old = (int *) malloc(nservers * sizeof(int));
241 syslog(LOG_ERR, "server_reset no mem old");
246 memset(ok_list_old, 0, nservers * sizeof(int));
247 memset(ok_list_new, 0, num_servers * sizeof(int));
249 /* reset timers--pointers will move */
250 for (j = 1; j < nservers; j++) { /* skip limbo */
251 if (j == me_server_idx)
253 timer_reset(otherservers[j].timer);
254 otherservers[j].timer = NULL;
257 /* check off entries on new list which are on old list.
258 check off entries on old list which are on new list. */
260 /* count limbo as "OK" */
262 ok_list_old[0] = 1; /* limbo is OK */
264 for (i=0; i<galaxy_list[k].galaxy_config.nservers; i++) {
265 serv_addr = &galaxy_list[k].galaxy_config.server_list[i].addr;
267 for (j = 1; j < nservers; j++) { /* j = 1 since we skip limbo */
268 if (otherservers[j].addr.sin_addr.s_addr == serv_addr->s_addr) {
269 /* if server is on both lists, mark */
273 break; /* for j loop */
278 /* remove any dead servers on old list not on new list. */
279 if (num_ok < nservers) {
282 new_num = 1; /* limbo */
283 /* count number of servers to keep */
284 for (j = 1; j < nservers; j++) {
285 /* since we are never SERV_DEAD, the following
286 test prevents removing ourself from the list */
287 if (ok_list_old[j] || (otherservers[j].state != SERV_DEAD)) {
288 syslog(LOG_INFO, "keeping server %s",
289 otherservers[j].addr_str);
293 if (new_num < nservers) {
294 servers = (Server *) malloc(new_num * sizeof(Server));
296 syslog(LOG_CRIT, "server_reset server malloc");
300 servers[0] = otherservers[0]; /* copy limbo */
302 srv = (int *) malloc(nservers * sizeof(int));
303 memset(srv, 0, nservers * sizeof(int));
305 /* copy the kept servers */
306 for (j = 1; j < nservers; j++) { /* skip limbo */
307 if (ok_list_old[j] ||
308 otherservers[j].state != SERV_DEAD) {
309 servers[i] = otherservers[j];
313 syslog(LOG_INFO, "flushing server %s",
314 otherservers[j].addr_str);
315 server_flush(&otherservers[j]);
320 srv_nack_renumber(srv);
324 otherservers = servers;
329 /* add any new servers on new list not on old list. */
331 for (i = 0; i < num_servers; i++) {
336 /* new_num is number of extras. */
338 otherservers = (Server *) realloc(otherservers, nservers * sizeof(Server));
340 syslog(LOG_CRIT, "server_reset realloc");
345 for (j = 1; j < nservers - new_num; j++) {
346 if (otherservers[j].addr.sin_addr.s_addr == my_addr.s_addr) {
351 if (!me_server_idx) {
352 syslog(LOG_CRIT, "can't find myself");
356 /* fill in otherservers with the new servers */
357 for (i = 0; i < num_servers; i++) {
358 if (!ok_list_new[i]) {
359 setup_server(&otherservers[nservers - (new_num--)],
360 &galaxy_list[k].galaxy_config.server_list[i].addr);
361 syslog(LOG_INFO, "adding server %s",
362 inet_ntoa(galaxy_list[k].galaxy_config.server_list[i].addr));
366 /* reset timers, to go off now.
367 We can't get a time-left indication (bleagh!)
368 so we expire them all now. This will generally
369 be non-destructive. We assume that when this code is
370 entered via a SIGHUP trigger that a system wizard
371 is watching the goings-on to make sure things straighten
374 for (i = 1; i < nservers; i++) { /* skip limbo */
375 if (i != me_server_idx && !otherservers[i].timer) {
376 otherservers[i].timer =
377 timer_set_rel(0L, server_timo, &otherservers[i]);
379 zdbug((LOG_DEBUG, "reset timer for %s",
380 otherservers[i].addr_str));
388 zdbug((LOG_DEBUG, "server_reset: %d servers now", nservers));
392 /* note: these must match the order given in zserver.h */
409 * A server timout has expired. If enough hello's have been unanswered,
410 * change state and act accordingly. Send a "hello" and reset the timer,
411 * incrementing the number of hello's sent.
413 * See the FSM in the Zephyr document for a better picture of what's
421 Server *which = (Server *) arg;
425 zdbug((LOG_DEBUG,"srv_timo: %s", which->addr_str));
427 /* change state and reset if appropriate */
428 switch(which->state) {
429 case SERV_DEAD: /* leave him dead */
433 case SERV_UP: /* he's now tardy */
434 which->state = SERV_TARDY;
435 which->num_hello_sent = 0;
436 which->timeout = timo_tardy;
441 if (which->num_hello_sent >= ((which->state == SERV_TARDY) ?
444 /* he hasn't answered, assume DEAD */
445 which->state = SERV_DEAD;
446 which->num_hello_sent = 0;
447 which->timeout = timo_dead;
448 srv_nack_release(which);
453 syslog(LOG_ERR,"Bad server state, server 0x%x\n",which);
456 /* now he's either TARDY, STARTING, or DEAD
457 We send a "hello," which increments the counter */
459 zdbug((LOG_DEBUG, "srv %s is %s", which->addr_str,
460 srv_states[which->state]));
462 server_hello(which, auth);
463 /* reschedule the timer */
464 which->timer = timer_set_rel(which->timeout, server_timo, which);
468 * Dispatch a notice from some other server
473 server_dispatch(notice, auth, who)
476 struct sockaddr_in *who;
479 struct sockaddr_in newwho;
481 String *notice_class;
484 zdbug((LOG_DEBUG, "server_dispatch"));
487 if (notice->z_kind == SERVACK) {
488 srv_nack_cancel(notice, who);
492 /* set up a who for the real origin */
493 memset(&newwho, 0, sizeof(newwho));
494 newwho.sin_family = AF_INET;
495 newwho.sin_addr.s_addr = notice->z_sender_addr.s_addr;
496 newwho.sin_port = notice->z_port;
498 server = server_which_server(who);
500 /* we can dispatch to routines safely here, since they will
501 return ZSRV_REQUEUE if appropriate. We bounce this back
502 to the caller, and the caller will re-queue the message
503 for us to process later. */
505 notice_class = make_string(notice->z_class, 1);
507 if (realm_which_realm(&newwho))
508 status = realm_dispatch(notice, auth, &newwho, server);
509 else if (class_is_admin(notice_class)) {
510 /* admins don't get acked, else we get a packet loop */
511 /* will return requeue if bdump request and dumping */
513 return admin_dispatch(notice, auth, who, server);
514 } else if (class_is_control(notice_class)) {
515 status = control_dispatch(notice, auth, &newwho, server);
517 } else if (class_is_ulogin(notice_class)) {
518 status = ulogin_dispatch(notice, auth, &newwho, server);
520 } else if (class_is_ulocate(notice_class)) {
521 status = ulocate_dispatch(notice, auth, &newwho, server);
524 /* shouldn't come from another server */
525 syslog(LOG_WARNING, "srv_disp: pkt cls %s", notice->z_class);
526 status = ZERR_NONE; /* XXX */
528 if (status != ZSRV_REQUEUE)
529 ack(notice, who); /* acknowledge it if processed */
530 free_string(notice_class);
536 * Register a new server (one not in our list). This MUST be authenticated.
541 server_register(notice, auth, who)
544 struct sockaddr_in *who;
550 if (who->sin_port != srv_addr.sin_port) {
552 zdbug((LOG_DEBUG, "srv_wrong port %d", ntohs(who->sin_port)));
556 /* Not yet... talk to ken about authenticators */
560 zdbug((LOG_DEBUG, "srv_unauth"));
565 /* OK, go ahead and set him up. */
566 temp = (Server *) malloc((nservers + 1) * sizeof(Server));
568 syslog(LOG_CRIT, "srv_reg malloc");
572 memcpy(temp, otherservers, nservers * sizeof(Server));
575 /* don't reschedule limbo's timer, so start i=1 */
576 for (i = 1; i < nservers; i++) {
577 if (i == me_server_idx) /* don't reset myself */
579 /* reschedule the timers--we moved otherservers */
580 timerval = timer_when(otherservers[i].timer);
581 timer_reset(otherservers[i].timer);
582 otherservers[i].timer = timer_set_abs(timerval, server_timo,
585 setup_server(&otherservers[nservers], &who->sin_addr);
586 otherservers[nservers].state = SERV_STARTING;
587 otherservers[nservers].timeout = timo_tardy;
588 otherservers[nservers].update_queue = NULL;
589 otherservers[nservers].dumping = 0;
593 zdbug((LOG_DEBUG, "srv %s is %s", otherservers[nservers].addr_str,
594 srv_states[otherservers[nservers].state]));
602 * Tell the other servers that this client died.
606 server_kill_clt(client)
610 char buf[512], *lyst[2];
612 ZNotice_t *pnotice; /* speed hack */
617 lyst[0] = inet_ntoa(client->addr.sin_addr),
618 sprintf(buf, "%d", ntohs(client->addr.sin_port));
622 zdbug((LOG_DEBUG, "server kill clt %s/%s", lyst[0], lyst[1]));
627 memset (¬ice, 0, sizeof(notice));
629 pnotice->z_kind = ACKED;
631 pnotice->z_port = srv_addr.sin_port;
632 pnotice->z_class = ZEPHYR_ADMIN_CLASS;
633 pnotice->z_class_inst = "";
634 pnotice->z_opcode = ADMIN_KILL_CLT;
635 pnotice->z_sender = myname; /* myname is the hostname */
636 pnotice->z_recipient = "";
637 pnotice->z_default_format = "";
638 pnotice->z_num_other_fields = 0;
643 /* don't tell limbo to flush, start at 1*/
644 for (i = 1; i < nservers; i++) {
645 if (i == me_server_idx) /* don't xmit to myself */
647 if (otherservers[i].state == SERV_DEAD)
650 retval = ZFormatNoticeList(pnotice, lyst, 2, &pack, &packlen,
651 auth ? ZAUTH : ZNOAUTH);
652 if (retval != ZERR_NONE) {
653 syslog(LOG_WARNING, "kill_clt format: %s", error_message(retval));
656 server_forw_reliable(&otherservers[i], pack, packlen, pnotice);
661 * A client has died. remove it
665 kill_clt(notice, server)
669 struct sockaddr_in who;
673 zdbug((LOG_DEBUG, "kill_clt"));
675 if (extract_addr(notice, &who) != ZERR_NONE)
676 return ZERR_NONE; /* XXX */
677 client = client_find(&who.sin_addr, notice->z_port);
679 syslog(LOG_NOTICE, "kill_clt: no such client (%s/%d) from %s",
680 inet_ntoa(who.sin_addr), ntohs(who.sin_port),
682 return ZERR_NONE; /* XXX */
686 syslog(LOG_DEBUG, "kill_clt clt_dereg %s/%d from %s",
687 inet_ntoa(who.sin_addr), ntohs(who.sin_port), server->addr_str);
691 /* remove the locations, too */
692 client_deregister(client, 1);
697 * extract a sockaddr_in from a message body
701 extract_addr(notice, who)
703 struct sockaddr_in *who;
705 char *cp = notice->z_message;
707 if (!notice->z_message_len) {
708 syslog(LOG_WARNING, "bad addr pkt");
711 who->sin_addr.s_addr = inet_addr(notice->z_message);
713 cp += strlen(cp) + 1;
714 if (cp >= notice->z_message + notice->z_message_len) {
715 syslog(LOG_WARNING, "short addr pkt");
718 who->sin_port = notice->z_port = htons((u_short) atoi(cp));
719 who->sin_family = AF_INET;
721 zdbug((LOG_DEBUG,"ext %s/%d", inet_ntoa(who->sin_addr),
722 ntohs(who->sin_port)));
728 * Flush all data associated with the server which
737 syslog(LOG_DEBUG, "server_flush %s", which->addr_str);
739 srv_nack_release(which);
743 * send a hello to which, updating the count of hello's sent
744 * Authenticate if auth is set.
748 server_hello(which, auth)
752 send_msg(&which->addr, ADMIN_HELLO, auth);
753 which->num_hello_sent++;
757 * Handle an ADMIN message from a server
762 admin_dispatch(notice, auth, who, server)
765 struct sockaddr_in *who;
768 char *opcode = notice->z_opcode;
769 Code_t status = ZERR_NONE;
772 zdbug((LOG_DEBUG, "ADMIN received"));
775 if (strcmp(opcode, ADMIN_HELLO) == 0) {
776 hello_respond(who, ADJUST, auth);
777 } else if (strcmp(opcode, ADMIN_IMHERE) == 0) {
779 } else if (strcmp(opcode, ADMIN_SHUTDOWN) == 0) {
781 zdbug((LOG_DEBUG, "server shutdown"));
784 srv_nack_release(server);
785 server->state = SERV_DEAD;
786 server->timeout = timo_dead;
787 /* don't worry about the timer, it will
788 be set appropriately on the next send */
790 zdbug((LOG_DEBUG, "srv %s is %s", server->addr_str,
791 srv_states[server->state]));
794 } else if (strcmp(opcode, ADMIN_BDUMP) == 0) {
795 /* Ignore a brain dump request if this is a brain dump packet
796 * or a packet being processed concurrently during a brain
798 if (bdumping || bdump_concurrent)
800 bdump_get(notice, auth, who, server);
801 } else if (strcmp(opcode, ADMIN_KILL_CLT) == 0) {
802 status = kill_clt(notice, server);
803 if (status == ZERR_NONE)
806 syslog(LOG_WARNING, "ADMIN unknown opcode %s",opcode);
813 * Handle an ADMIN message from some random client.
814 * For now, assume it's a registration-type message from some other
815 * previously unknown server
820 server_adispatch(notice, auth, who, server)
823 struct sockaddr_in *who;
827 /* this had better be a HELLO message--start of acquisition
828 protocol, OR a status req packet */
830 if (strcmp(notice->z_opcode, ADMIN_STATUS) == 0) {
837 syslog(LOG_INFO, "disp: new server?");
838 if (server_register(notice, auth, who) != ZERR_NONE) {
839 syslog(LOG_INFO, "new server failed");
841 syslog(LOG_INFO, "new server %s, %d", inet_ntoa(who->sin_addr),
842 ntohs(who->sin_port));
843 hello_respond(who, DONT_ADJUST, auth);
846 syslog(LOG_INFO, "srv_adisp: server attempt from %s",
847 inet_ntoa(who->sin_addr));
855 struct sockaddr_in *who;
861 char *vers, *pkts, *upt;
865 #define NUM_FIXED 3 /* 3 fixed fields, plus server info */
866 /* well, not really...but for
867 backward compatibility, we gotta
869 vers = get_version();
871 sprintf(buf, "%d pkts", npackets);
873 sprintf(buf, "%d seconds operational",NOW - uptime);
876 extrafields += nrealms;
877 responses = (char **) malloc((NUM_FIXED + nservers + extrafields) *
883 num_resp = NUM_FIXED;
884 /* start at 1 and ignore limbo */
885 for (i = 1; i < nservers ; i++) {
886 sprintf(buf, "%s/%s%s", otherservers[i].addr_str,
887 srv_states[(int) otherservers[i].state],
888 otherservers[i].dumping ? " (DUMPING)" : "");
889 responses[num_resp++] = strsave(buf);
891 for (realm = otherrealms, i = 0; i < nrealms ; i++, realm++) {
892 sprintf(buf, "%s(%s)/%s", realm->name,
893 inet_ntoa((realm->addrs[realm->idx]).sin_addr),
894 rlm_states[(int) realm->state]);
895 responses[num_resp++] = strsave(buf);
898 send_msg_list(who, ADMIN_STATUS, responses, num_resp, 0);
900 /* Start at one; don't try to free static version string */
901 for (i = 1; i < num_resp; i++)
907 * Get the local galaxy name.
910 int get_server_galaxy(const char *file)
916 /* if it's already set (presumably by -g), just return. */
917 if (my_galaxy[0] != '\0')
920 fp = fopen(file, "r");
925 if (!fgets(line, sizeof(line), fp)) {
929 if (strncasecmp(line, "galaxy=", 7) == 0)
934 /* remove final newline */
935 nl = strchr(line, '\n');
942 strcpy(my_galaxy, line+7);
948 * initialize the server structure for address addr, and set a timer
949 * to go off immediately to send hello's to other servers.
953 setup_server(server, addr)
955 struct in_addr *addr;
957 server->state = SERV_DEAD;
958 server->timeout = timo_dead;
959 server->num_hello_sent = 0;
960 server->addr.sin_family = AF_INET;
961 /* he listens to the same port we do */
962 server->addr.sin_port = srv_addr.sin_port;
963 server->addr.sin_addr = *addr;
964 strcpy(server->addr_str, inet_ntoa(*addr));
965 server->timer = timer_set_rel(0L, server_timo, server);
966 server->queue = NULL;
971 * Someone sent us a hello message, respond to them.
975 hello_respond(who, adj, auth)
976 struct sockaddr_in *who;
983 zdbug((LOG_DEBUG, "hello from %s", inet_ntoa(who->sin_addr)));
986 send_msg(who, ADMIN_IMHERE, auth);
990 /* If we think he's down, schedule an immediate HELLO. */
992 which = server_which_server(who);
996 switch (which->state) {
998 /* he said hello, we thought he was dead.
999 reschedule his hello for now. */
1000 timer_reset(which->timer);
1001 which->timer = timer_set_rel(0L, server_timo, which);
1012 * return the server descriptor for server at who
1016 server_which_server(who)
1017 struct sockaddr_in *who;
1022 if (who->sin_port != srv_addr.sin_port)
1025 /* don't check limbo */
1026 for (server = &otherservers[1], i = 1; i < nservers; i++, server++) {
1027 if (server->addr.sin_addr.s_addr == who->sin_addr.s_addr)
1034 * We received a response to a hello packet or an ack. Adjust server state
1039 struct sockaddr_in *who;
1041 Server *which = server_which_server(who);
1044 zdbug((LOG_DEBUG, "srv_responded %s", inet_ntoa(who->sin_addr)));
1048 syslog(LOG_ERR, "hello input from non-server?!");
1052 switch (which->state) {
1054 /* he responded, we thought he was dead. mark as starting
1056 which->state = SERV_STARTING;
1057 which->timeout = timo_tardy;
1058 timer_reset(which->timer);
1059 which->timer = timer_set_rel(0L, server_timo, which);
1062 /* here we negotiate and set up a braindump */
1063 if (bdump_socket < 0)
1068 which->state = SERV_UP;
1072 /* reset the timer and counts */
1073 which->num_hello_sent = 0;
1074 which->timeout = timo_up;
1075 timer_reset(which->timer);
1076 which->timer = timer_set_rel(which->timeout, server_timo, which);
1080 zdbug((LOG_DEBUG, "srv %s is %s", which->addr_str,
1081 srv_states[which->state]));
1086 * Send each of the other servers a shutdown message.
1094 /* don't tell limbo to go away, start at 1*/
1095 for (i = 1; i < nservers; i++)
1096 send_msg(&otherservers[i].addr, ADMIN_SHUTDOWN, 1);
1100 * send a message to who with admin class and opcode and clinst as specified.
1101 * auth is set if we want to send authenticated
1105 send_msg(who, opcode, auth)
1106 struct sockaddr_in *who;
1111 ZNotice_t *pnotice; /* speed hack */
1118 memset (¬ice, 0, sizeof(notice));
1120 pnotice->z_kind = ACKED;
1122 pnotice->z_port = srv_addr.sin_port;
1123 pnotice->z_class = ZEPHYR_ADMIN_CLASS;
1124 pnotice->z_class_inst = "";
1125 pnotice->z_opcode = opcode;
1126 pnotice->z_sender = myname; /* myname is the hostname */
1127 pnotice->z_recipient = "";
1128 pnotice->z_default_format = "";
1129 pnotice->z_message = NULL;
1130 pnotice->z_message_len = 0;
1131 pnotice->z_num_other_fields = 0;
1133 /* XXX for now, we don't do authentication */
1136 retval = ZFormatNotice(pnotice, &pack, &packlen, auth ? ZAUTH : ZNOAUTH);
1137 if (retval != ZERR_NONE) {
1138 syslog(LOG_WARNING, "snd_msg format: %s", error_message(retval));
1141 retval = ZSetDestAddr(who);
1142 if (retval != ZERR_NONE) {
1143 syslog(LOG_WARNING, "snd_msg set addr: %s", error_message(retval));
1147 /* don't wait for ack */
1148 retval = ZSendPacket(pack, packlen, 0);
1149 if (retval != ZERR_NONE)
1150 syslog(LOG_WARNING, "snd_msg xmit: %s", error_message(retval));
1155 * send a notice with a message to who with admin class and opcode and
1156 * message body as specified.
1157 * auth is set if we want to send authenticated
1158 * server_idx is -1 if we are sending to a client, or the server index
1159 * if we are sending to a server.
1163 send_msg_list(who, opcode, lyst, num, auth)
1164 struct sockaddr_in *who;
1176 memset (¬ice, 0, sizeof(notice));
1178 notice.z_kind = UNSAFE;
1179 notice.z_port = srv_addr.sin_port;
1180 notice.z_class = ZEPHYR_ADMIN_CLASS;
1181 notice.z_class_inst = "";
1182 notice.z_opcode = opcode;
1183 notice.z_sender = myname; /* myname is the hostname */
1184 notice.z_recipient = "";
1185 notice.z_default_format = "";
1186 notice.z_message = NULL;
1187 notice.z_message_len = 0;
1188 notice.z_num_other_fields = 0;
1190 /* XXX for now, we don't do authentication */
1193 retval = ZFormatNoticeList(¬ice, lyst, num, &pack, &packlen,
1194 auth ? ZAUTH : ZNOAUTH);
1195 if (retval != ZERR_NONE) {
1196 syslog(LOG_WARNING, "snd_msg_lst format: %s", error_message(retval));
1199 retval = ZSetDestAddr(who);
1200 if (retval != ZERR_NONE) {
1201 syslog(LOG_WARNING, "snd_msg_lst set addr: %s", error_message(retval));
1205 xmit_frag(¬ice, pack, packlen, 0);
1210 * Forward the notice to the other servers
1214 server_forward(notice, auth, who)
1217 struct sockaddr_in *who;
1225 zdbug((LOG_DEBUG, "srv_forw"));
1227 /* don't send to limbo */
1228 for (i = 1; i < nservers; i++) {
1229 if (i == me_server_idx) /* don't xmit to myself */
1231 if (otherservers[i].state == SERV_DEAD &&
1232 otherservers[i].dumping == 0) {
1233 /* if we are dumping to him, we want to
1234 queue it, even if he's dead */
1238 pack = malloc(sizeof(ZPacket_t));
1240 syslog(LOG_CRIT, "srv_fwd malloc");
1243 retval = ZFormatSmallRawNotice(notice, pack, &packlen);
1244 if (retval != ZERR_NONE) {
1245 syslog(LOG_WARNING, "srv_fwd format: %s", error_message(retval));
1248 if (otherservers[i].dumping) {
1249 server_queue(&otherservers[i], packlen, pack, auth, who);
1252 server_forw_reliable(&otherservers[i], pack, packlen, notice);
1257 server_forw_reliable(server, pack, packlen, notice)
1267 retval = ZSetDestAddr(&server->addr);
1268 if (retval != ZERR_NONE) {
1269 syslog(LOG_WARNING, "srv_fwd_rel set addr: %s", error_message(retval));
1273 retval = ZSendPacket(pack, packlen, 0);
1274 if (retval != ZERR_NONE) {
1275 syslog(LOG_WARNING, "srv_fwd xmit: %s", error_message(retval));
1279 /* now we've sent it, mark it as not ack'ed */
1281 nacked = (Unacked *) malloc(sizeof(Unacked));
1283 /* no space: just punt */
1284 syslog(LOG_ERR, "srv_forw_rel nack malloc");
1289 nacked->client = NULL;
1290 nacked->rexmits = 0;
1291 nacked->packet = pack;
1292 nacked->dest.srv_idx = server - otherservers;
1293 nacked->packsz = packlen;
1294 nacked->uid = notice->z_uid;
1295 nacked->timer = timer_set_rel(rexmit_times[0], srv_rexmit, nacked);
1296 hashval = SRV_NACKTAB_HASHVAL(nacked->dest.srv_idx, nacked->uid);
1297 LIST_INSERT(&srv_nacktab[hashval], nacked);
1301 * send the queued message for the server.
1305 server_send_queue(server)
1312 while (server->queue) {
1313 pending = server_dequeue(server);
1314 status = ZParseNotice(pending->packet, pending->len, ¬ice);
1315 if (status != ZERR_NONE) {
1316 syslog(LOG_ERR, "ssq bad notice parse (%s): %s",
1317 inet_ntoa(pending->who.sin_addr), error_message(status));
1319 server_forw_reliable(server, pending->packet, pending->len,
1322 /* ACK handling routines will free the packet */
1328 * a server has acknowledged a message we sent to him; remove it from
1329 * server unacked queue
1333 srv_nack_cancel(notice, who)
1335 struct sockaddr_in *who;
1337 Server *server = server_which_server(who);
1342 syslog(LOG_ERR, "non-server ack?");
1345 hashval = SRV_NACKTAB_HASHVAL(server - otherservers, notice->z_uid);
1346 for (nacked = srv_nacktab[hashval]; nacked; nacked = nacked->next) {
1347 if (nacked->dest.srv_idx == server - otherservers
1348 && ZCompareUID(&nacked->uid, ¬ice->z_uid)) {
1349 timer_reset(nacked->timer);
1350 free(nacked->packet);
1351 LIST_DELETE(nacked);
1357 zdbug((LOG_DEBUG, "srv_nack not found"));
1362 * retransmit a message to another server
1369 Unacked *packet = (Unacked *) arg;
1371 /* retransmit the packet */
1374 zdbug((LOG_DEBUG,"srv_rexmit to %s/%d",
1375 otherservers[packet->dest.srv_idx].addr_str,
1376 ntohs(otherservers[packet->dest.srv_idx].addr.sin_port)));
1378 if (otherservers[packet->dest.srv_idx].state == SERV_DEAD) {
1380 zdbug((LOG_DEBUG, "cancelling send to dead server"));
1382 LIST_DELETE(packet);
1383 free(packet->packet);
1384 srv_nack_release(&otherservers[packet->dest.srv_idx]);
1388 retval = ZSetDestAddr(&otherservers[packet->dest.srv_idx].addr);
1389 if (retval != ZERR_NONE) {
1390 syslog(LOG_WARNING, "srv_rexmit set addr: %s", error_message(retval));
1392 retval = ZSendPacket(packet->packet, packet->packsz, 0);
1393 if (retval != ZERR_NONE)
1394 syslog(LOG_WARNING, "srv_rexmit xmit: %s",
1395 error_message(retval));
1398 /* reset the timer */
1399 if (rexmit_times[packet->rexmits + 1] != -1)
1401 packet->timer = timer_set_rel(rexmit_times[packet->rexmits], srv_rexmit,
1406 * Clean up the not-yet-acked queue and release anything destined
1411 srv_nack_release(server)
1415 Unacked *nacked, *next;
1417 for (i = 0; i < SRV_NACKTAB_HASHSIZE; i++) {
1418 for (nacked = srv_nacktab[i]; nacked; nacked = next) {
1419 next = nacked->next;
1420 if (nacked->dest.srv_idx == server - otherservers) {
1421 timer_reset(nacked->timer);
1422 LIST_DELETE(nacked);
1423 free(nacked->packet);
1431 * Adjust indices of not-yet-acked packets sent to other servers to
1432 * continue to refer to the correct server.
1436 srv_nack_renumber (new_idx)
1439 /* XXX release any private queue for this server */
1443 /* search the not-yet-acked list for anything destined to 'from', and
1444 change the index to 'to'. */
1445 for (i = 0; i < SRV_NACKTAB_HASHSIZE; i++) {
1446 for (nacked = srv_nacktab[i]; nacked; nacked = nacked->next) {
1447 idx = new_idx[nacked->dest.srv_idx];
1449 syslog(LOG_ERR, "srv_nack_renumber error: [%d]=%d",
1450 nacked->dest.srv_idx, idx);
1453 nacked->dest.srv_idx = idx;
1459 * Queue this notice to be transmitted to the server when it is ready.
1462 server_queue(server, len, pack, auth, who)
1467 struct sockaddr_in *who;
1471 pending = (Pending *) malloc(sizeof(Pending));
1473 syslog(LOG_CRIT, "update_queue malloc");
1476 pending->packet = pack;
1478 pending->auth = auth;
1479 pending->who = *who;
1480 pending->next = NULL;
1482 /* put it on the end of the list */
1484 server->queue_last->next = pending;
1486 server->queue = server->queue_last = pending;
1490 * Pull a notice off the hold queue.
1494 server_dequeue(server)
1501 pending = server->queue;
1502 server->queue = pending->next;
1507 * free storage used by a pending queue entry.
1511 server_pending_free(pending)
1514 free(pending->packet);
1520 * Queue something to be handled later by this server.
1524 server_self_queue(notice, auth, who)
1527 struct sockaddr_in * who;
1533 retval = ZFormatRawNotice(notice, &pack, &packlen);
1534 if (retval != ZERR_NONE) {
1535 syslog(LOG_CRIT, "srv_self_queue format: %s", error_message(retval));
1538 server_queue(me_server, packlen, pack, auth, who);
1542 * dump info about servers onto the fp.
1543 * assumed to be called with SIGFPE blocked
1544 * (true if called from signal handler)
1547 server_dump_servers(fp)
1552 for (i = 0; i < nservers ; i++) {
1553 fprintf(fp, "%d:%s/%s%s\n", i, otherservers[i].addr_str,
1554 srv_states[otherservers[i].state],
1555 otherservers[i].dumping ? " (DUMPING)" : "");