1 /* This file is part of the Project Athena Zephyr Notification System.
2 * It contains functions for communication with other servers.
4 * Created by: John T. Kohl
6 * $Source: /afs/dev.mit.edu/source/repository/athena/lib/zephyr/server/server.c,v $
7 * $Author: kcr@ATHENA.MIT.EDU $
9 * Copyright (c) 1987, 1991 by the Massachusetts Institute of Technology.
10 * For copying and distribution information, see the file
14 #include <zephyr/mit-copyright.h>
16 #include <sys/socket.h>
20 static const char rcsid_server_c[] = "$Id: server.c 2630 2011-02-02 05:26:26Z kcr@ATHENA.MIT.EDU $";
25 SRV_NACKTAB_HASHSIZE = 1023
28 srv_nacktab_hashval(int which, ZUnique_Id_t uid) {
30 uid.zuid_addr.s_addr ^ uid.tv.tv_sec ^ uid.tv.tv_usec)
31 % SRV_NACKTAB_HASHSIZE;
35 * Server manager. Deal with traffic to and from other servers.
39 * void server_shutdown()
41 * void server_timo(which)
44 * void server_dispatch(notice, auth, who)
47 * struct sockaddr_in *who;
49 * void server_recover(client)
52 * void server_adispatch(notice, auth, who, server)
55 * struct sockaddr_in *who;
58 * void server_forward(notice, auth, who)
61 * struct sockaddr_in *who;
63 * Server *server_which_server(who)
64 * struct sockaddr_in *who;
66 * void server_kill_clt(client);
69 * void server_dump_servers(fp);
72 * void server_reset();
75 static void server_flush(Server *);
76 static void hello_respond(struct sockaddr_in *, int, int);
77 static void srv_responded(struct sockaddr_in *);
78 static void send_msg(struct sockaddr_in *, char *, int);
79 static void send_msg_list(struct sockaddr_in *, char *, char **, int,
81 static void srv_nack_cancel(ZNotice_t *, struct sockaddr_in *);
82 static void srv_nack_release(Server *);
83 static void srv_nack_renumber (int *);
84 static void send_stats(struct sockaddr_in *);
85 static void server_queue(Server *, int, void *, int,
86 struct sockaddr_in *);
87 static void server_hello(Server *, int);
88 static void setup_server(Server *, struct in_addr *);
89 static void srv_rexmit(void *);
90 static void server_forw_reliable(Server *, void *, int, ZNotice_t *);
91 static Code_t admin_dispatch(ZNotice_t *, int, struct sockaddr_in *,
93 static Code_t kill_clt(ZNotice_t *, Server *);
94 static Code_t extract_addr(ZNotice_t *, struct sockaddr_in *);
96 static struct in_addr *get_server_addrs(int *number);
97 static char **get_server_list(char *file);
98 static char **get_single_server(void);
99 static void free_server_list(char **list);
101 static Unacked *srv_nacktab[SRV_NACKTAB_HASHSIZE];
102 Server *otherservers; /* points to an array of the known
104 int nservers; /* number of other servers */
105 int me_server_idx; /* # of my entry in the array */
107 #define ADJUST (1) /* adjust timeout on hello input */
108 #define DONT_ADJUST (0) /* don't adjust timeout */
110 /* parameters controlling the transitions of the FSM's--patchable with adb */
111 long timo_up = TIMO_UP;
112 long timo_tardy = TIMO_TARDY;
113 long timo_dead = TIMO_DEAD;
115 /* counters to measure old protocol use */
117 int old_compat_count_uloc = 0;
118 int old_compat_count_ulocate = 0;
119 int old_compat_count_subscr = 0;
120 #endif /* OLD_COMPAT */
122 int new_compat_count_uloc = 0;
123 int new_compat_count_subscr = 0;
124 #endif /* NEW_COMPAT */
130 * Initialize the array of servers. The `limbo' server goes in the first
131 * slot (otherservers[0]).
132 * Contact Hesiod to find all the other servers, allocate space for the
133 * structure, initialize them all to SERV_DEAD with expired timeouts.
134 * Set up a list header for server_forward retransmits.
141 struct in_addr *serv_addr, *server_addrs, limbo_addr;
143 /* we don't need to mask SIGFPE here since when we are called,
144 the signal handler isn't set up yet. */
146 /* talk to hesiod here, set nservers */
147 server_addrs = get_server_addrs(&nservers);
149 syslog(LOG_ERR, "No servers?!?");
158 /* increment servers to make room for 'limbo' */
161 otherservers = (Server *) malloc(nservers * sizeof(Server));
165 limbo_addr.s_addr = 0;
166 setup_server(otherservers, &limbo_addr);
167 timer_reset(otherservers[0].timer);
168 otherservers[0].timer = NULL;
169 otherservers[0].queue = NULL;
170 otherservers[0].dumping = 0;
172 for (serv_addr = server_addrs, i = 1; i < nservers; serv_addr++, i++) {
173 setup_server(&otherservers[i], serv_addr);
175 if (serv_addr->s_addr == my_addr.s_addr) {
177 otherservers[i].state = SERV_UP;
178 timer_reset(otherservers[i].timer);
179 otherservers[i].timer = NULL;
180 otherservers[i].queue = NULL;
181 otherservers[i].dumping = 0;
185 /* free up the addresses */
188 if (me_server_idx == -1) {
189 syslog(LOG_WARNING, "I'm a renegade server!");
190 otherservers = (Server *) realloc(otherservers,
191 ++nservers * sizeof(Server));
193 syslog(LOG_CRIT, "renegade realloc");
196 setup_server(&otherservers[nservers - 1], &my_addr);
198 otherservers[nservers - 1].state = SERV_UP;
200 /* I don't send hello's to myself--cancel the timer */
201 timer_reset(otherservers[nservers - 1].timer);
202 otherservers[nservers - 1].timer = NULL;
204 /* cancel and reschedule all the timers--pointers need
206 /* don't reschedule limbo's timer, so start i=1 */
207 for (i = 1; i < nservers - 1; i++) {
208 timer_reset(otherservers[i].timer);
209 /* all the HELLO's are due now */
210 otherservers[i].timer = timer_set_rel(0L, server_timo,
213 me_server_idx = nservers - 1;
219 * server_reset: re-initializes otherservers array by refreshing from Hesiod
222 * If any server is no longer named in the new list, and that server is in
223 * state SERV_DEAD, it is dropped from the server list.
224 * All other currently-known servers are retained.
225 * Any additional servers not previously known are added to the table.
227 * WARNING: Don't call this routine if any of the ancestor procedures have a
228 * handle on a particular server other than by indexing on otherservers[].
234 struct in_addr *server_addrs;
235 struct in_addr *serv_addr;
238 int *ok_list_new, *ok_list_old;
243 syslog(LOG_INFO, "server_reset while alone, punt");
248 /* Find out what servers are supposed to be known. */
249 server_addrs = get_server_addrs(&num_servers);
251 syslog(LOG_ERR, "server_reset no servers. nothing done.");
254 ok_list_new = (int *) malloc(num_servers * sizeof(int));
256 syslog(LOG_ERR, "server_reset no mem new");
259 ok_list_old = (int *) malloc(nservers * sizeof(int));
261 syslog(LOG_ERR, "server_reset no mem old");
266 memset(ok_list_old, 0, nservers * sizeof(int));
267 memset(ok_list_new, 0, num_servers * sizeof(int));
269 /* reset timers--pointers will move */
270 for (j = 1; j < nservers; j++) { /* skip limbo */
271 if (j == me_server_idx)
273 timer_reset(otherservers[j].timer);
274 otherservers[j].timer = NULL;
277 /* check off entries on new list which are on old list.
278 check off entries on old list which are on new list. */
280 /* count limbo as "OK" */
282 ok_list_old[0] = 1; /* limbo is OK */
284 for (serv_addr = server_addrs, i = 0; i < num_servers; serv_addr++, i++) {
285 for (j = 1; j < nservers; j++) { /* j = 1 since we skip limbo */
286 if (otherservers[j].addr.sin_addr.s_addr == serv_addr->s_addr) {
287 /* if server is on both lists, mark */
291 break; /* for j loop */
296 /* remove any dead servers on old list not on new list. */
297 if (num_ok < nservers) {
300 new_num = 1; /* limbo */
301 /* count number of servers to keep */
302 for (j = 1; j < nservers; j++) {
303 /* since we are never SERV_DEAD, the following
304 test prevents removing ourself from the list */
305 if (ok_list_old[j] || (otherservers[j].state != SERV_DEAD)) {
306 syslog(LOG_INFO, "keeping server %s",
307 otherservers[j].addr_str);
311 if (new_num < nservers) {
312 servers = (Server *) malloc(new_num * sizeof(Server));
314 syslog(LOG_CRIT, "server_reset server malloc");
318 servers[0] = otherservers[0]; /* copy limbo */
320 srv = (int *) malloc(nservers * sizeof(int));
321 memset(srv, 0, nservers * sizeof(int));
323 /* copy the kept servers */
324 for (j = 1; j < nservers; j++) { /* skip limbo */
325 if (ok_list_old[j] ||
326 otherservers[j].state != SERV_DEAD) {
327 servers[i] = otherservers[j];
331 syslog(LOG_INFO, "flushing server %s",
332 otherservers[j].addr_str);
333 server_flush(&otherservers[j]);
338 srv_nack_renumber(srv);
342 otherservers = servers;
347 /* add any new servers on new list not on old list. */
349 for (i = 0; i < num_servers; i++) {
354 /* new_num is number of extras. */
356 otherservers = (Server *) realloc(otherservers, nservers * sizeof(Server));
358 syslog(LOG_CRIT, "server_reset realloc");
363 for (j = 1; j < nservers - new_num; j++) {
364 if (otherservers[j].addr.sin_addr.s_addr == my_addr.s_addr) {
369 if (!me_server_idx) {
370 syslog(LOG_CRIT, "can't find myself");
374 /* fill in otherservers with the new servers */
375 for (i = 0; i < num_servers; i++) {
376 if (!ok_list_new[i]) {
377 setup_server(&otherservers[nservers - (new_num--)],
379 syslog(LOG_INFO, "adding server %s", inet_ntoa(server_addrs[i]));
384 /* reset timers, to go off now.
385 We can't get a time-left indication (bleagh!)
386 so we expire them all now. This will generally
387 be non-destructive. We assume that when this code is
388 entered via a SIGHUP trigger that a system wizard
389 is watching the goings-on to make sure things straighten
392 for (i = 1; i < nservers; i++) { /* skip limbo */
393 if (i != me_server_idx && !otherservers[i].timer) {
394 otherservers[i].timer =
395 timer_set_rel(0L, server_timo, &otherservers[i]);
403 /* note: these must match the order given in zserver.h */
420 * A server timout has expired. If enough hello's have been unanswered,
421 * change state and act accordingly. Send a "hello" and reset the timer,
422 * incrementing the number of hello's sent.
424 * See the FSM in the Zephyr document for a better picture of what's
429 server_timo(void *arg)
431 Server *which = (Server *) arg;
434 /* change state and reset if appropriate */
435 switch(which->state) {
436 case SERV_DEAD: /* leave him dead */
440 case SERV_UP: /* he's now tardy */
441 which->state = SERV_TARDY;
442 which->num_hello_sent = 0;
443 which->timeout = timo_tardy;
448 if (which->num_hello_sent >= ((which->state == SERV_TARDY) ?
451 /* he hasn't answered, assume DEAD */
452 which->state = SERV_DEAD;
453 which->num_hello_sent = 0;
454 which->timeout = timo_dead;
455 srv_nack_release(which);
460 syslog(LOG_ERR,"Bad server state, server 0x%x\n", (int)which);
463 /* now he's either TARDY, STARTING, or DEAD
464 We send a "hello," which increments the counter */
465 server_hello(which, auth);
466 /* reschedule the timer */
467 which->timer = timer_set_rel(which->timeout, server_timo, which);
471 * Dispatch a notice from some other server
476 server_dispatch(ZNotice_t *notice,
478 struct sockaddr_in *who)
481 struct sockaddr_in newwho;
483 String *notice_class;
486 if (notice->z_kind == SERVACK) {
487 srv_nack_cancel(notice, who);
491 /* set up a who for the real origin */
492 notice_extract_address(notice, &newwho);
494 server = server_which_server(who);
496 /* we can dispatch to routines safely here, since they will
497 return ZSRV_REQUEUE if appropriate. We bounce this back
498 to the caller, and the caller will re-queue the message
499 for us to process later. */
501 notice_class = make_string(notice->z_class, 1);
503 if (realm_which_realm(&newwho))
504 status = realm_dispatch(notice, auth, &newwho, server);
505 else if (class_is_admin(notice_class)) {
506 /* admins don't get acked, else we get a packet loop */
507 /* will return requeue if bdump request and dumping */
509 return admin_dispatch(notice, auth, who, server);
510 } else if (class_is_control(notice_class)) {
511 status = control_dispatch(notice, auth, &newwho, server);
513 } else if (class_is_ulogin(notice_class)) {
514 status = ulogin_dispatch(notice, auth, &newwho, server);
516 } else if (class_is_ulocate(notice_class)) {
517 status = ulocate_dispatch(notice, auth, &newwho, server);
520 /* shouldn't come from another server */
521 syslog(LOG_WARNING, "srv_disp: pkt cls %s", notice->z_class);
522 status = ZERR_NONE; /* XXX */
524 if (status != ZSRV_REQUEUE)
525 ack(notice, who); /* acknowledge it if processed */
526 free_string(notice_class);
531 * Tell the other servers that this client died.
535 server_kill_clt(Client *client)
538 char buf[512], *lyst[2];
540 ZNotice_t *pnotice; /* speed hack */
545 lyst[0] = inet_ntoa(client->addr.sin_addr),
546 sprintf(buf, "%d", ntohs(client->addr.sin_port));
551 memset (¬ice, 0, sizeof(notice));
553 pnotice->z_kind = ACKED;
555 pnotice->z_port = srv_addr.sin_port;
556 pnotice->z_class = ZEPHYR_ADMIN_CLASS;
557 pnotice->z_class_inst = "";
558 pnotice->z_opcode = ADMIN_KILL_CLT;
559 pnotice->z_sender = myname; /* myname is the hostname */
560 pnotice->z_recipient = "";
561 pnotice->z_default_format = "";
562 pnotice->z_num_other_fields = 0;
567 /* don't tell limbo to flush, start at 1*/
568 for (i = 1; i < nservers; i++) {
569 if (i == me_server_idx) /* don't xmit to myself */
571 if (otherservers[i].state == SERV_DEAD)
574 retval = ZFormatNoticeList(pnotice, lyst, 2, &pack, &packlen,
575 auth ? ZAUTH : ZNOAUTH);
576 if (retval != ZERR_NONE) {
577 syslog(LOG_WARNING, "kill_clt format: %s", error_message(retval));
580 server_forw_reliable(&otherservers[i], pack, packlen, pnotice);
585 * A client has died. remove it
589 kill_clt(ZNotice_t *notice,
592 struct sockaddr_in who;
595 if (extract_addr(notice, &who) != ZERR_NONE)
596 return ZERR_NONE; /* XXX */
597 client = client_find(&who.sin_addr, notice->z_port);
599 syslog(LOG_NOTICE, "kill_clt: no such client (%s/%d) from %s",
600 inet_ntoa(who.sin_addr), ntohs(who.sin_port),
602 return ZERR_NONE; /* XXX */
606 syslog(LOG_DEBUG, "kill_clt clt_dereg %s/%d from %s",
607 inet_ntoa(who.sin_addr), ntohs(who.sin_port), server->addr_str);
610 /* remove the locations, too */
611 client_deregister(client, 1);
616 * extract a sockaddr_in from a message body
620 extract_addr(ZNotice_t *notice,
621 struct sockaddr_in *who)
623 char *cp = notice->z_message;
625 if (!notice->z_message_len) {
626 syslog(LOG_WARNING, "bad addr pkt");
629 who->sin_addr.s_addr = inet_addr(notice->z_message);
631 cp += strlen(cp) + 1;
632 if (cp >= notice->z_message + notice->z_message_len) {
633 syslog(LOG_WARNING, "short addr pkt");
636 who->sin_port = notice->z_port = htons((u_short) atoi(cp));
637 who->sin_family = AF_INET;
642 * Flush all data associated with the server which
646 server_flush(Server *which)
648 srv_nack_release(which);
652 * send a hello to which, updating the count of hello's sent
653 * Authenticate if auth is set.
657 server_hello(Server *which,
660 send_msg(&which->addr, ADMIN_HELLO, auth);
661 which->num_hello_sent++;
665 * Handle an ADMIN message from a server
670 admin_dispatch(ZNotice_t *notice,
672 struct sockaddr_in *who,
675 char *opcode = notice->z_opcode;
676 Code_t status = ZERR_NONE;
678 if (strcmp(opcode, ADMIN_HELLO) == 0) {
679 hello_respond(who, ADJUST, auth);
680 } else if (strcmp(opcode, ADMIN_IMHERE) == 0) {
682 } else if (strcmp(opcode, ADMIN_SHUTDOWN) == 0) {
684 srv_nack_release(server);
685 server->state = SERV_DEAD;
686 server->timeout = timo_dead;
687 /* don't worry about the timer, it will
688 be set appropriately on the next send */
690 } else if (strcmp(opcode, ADMIN_BDUMP) == 0) {
691 /* Ignore a brain dump request if this is a brain dump packet
692 * or a packet being processed concurrently during a brain
694 if (bdumping || bdump_concurrent)
696 bdump_get(notice, auth, who, server);
697 } else if (strcmp(opcode, ADMIN_KILL_CLT) == 0) {
698 status = kill_clt(notice, server);
699 if (status == ZERR_NONE)
702 syslog(LOG_WARNING, "ADMIN unknown opcode %s",opcode);
709 * Handle an ADMIN message from some random client.
710 * For now, assume it's a registration-type message from some other
711 * previously unknown server
716 server_adispatch(ZNotice_t *notice,
718 struct sockaddr_in *who,
722 /* this had better be a HELLO message--start of acquisition
723 protocol, OR a status req packet */
725 if (strcmp(notice->z_opcode, ADMIN_STATUS) == 0) {
731 syslog(LOG_INFO, "srv_adisp: server attempt from %s",
732 inet_ntoa(who->sin_addr));
738 send_stats(struct sockaddr_in *who)
744 char *vers, *pkts, *upt;
748 #define NUM_FIXED 3 /* 3 fixed fields, plus server info */
749 /* well, not really...but for
750 backward compatibility, we gotta
752 vers = get_version();
754 sprintf(buf, "%lu pkts", npackets);
756 sprintf(buf, "%ld seconds operational",NOW - uptime);
760 if (old_compat_count_uloc)
762 if (old_compat_count_ulocate)
764 if (old_compat_count_subscr)
766 #endif /* OLD_COMPAT */
768 if (new_compat_count_uloc)
770 if (new_compat_count_subscr)
772 #endif /* NEW_COMPAT */
773 extrafields += nrealms;
774 responses = (char **) malloc((NUM_FIXED + nservers + extrafields) *
780 num_resp = NUM_FIXED;
781 /* start at 1 and ignore limbo */
782 for (i = 1; i < nservers ; i++) {
783 sprintf(buf, "%s/%s%s", otherservers[i].addr_str,
784 srv_states[(int) otherservers[i].state],
785 otherservers[i].dumping ? " (DUMPING)" : "");
786 responses[num_resp++] = strsave(buf);
789 if (old_compat_count_uloc) {
790 sprintf(buf, "%d old old location requests", old_compat_count_uloc);
791 responses[num_resp++] = strsave(buf);
793 if (old_compat_count_ulocate) {
794 sprintf(buf, "%d old old loc lookup requests",
795 old_compat_count_ulocate);
796 responses[num_resp++] = strsave(buf);
798 if (old_compat_count_subscr) {
799 sprintf(buf, "%d old old subscr requests", old_compat_count_subscr);
800 responses[num_resp++] = strsave(buf);
802 #endif /* OLD_COMPAT */
804 if (new_compat_count_uloc) {
805 sprintf(buf, "%d new old location requests", new_compat_count_uloc);
806 responses[num_resp++] = strsave(buf);
808 if (new_compat_count_subscr) {
809 sprintf(buf, "%d new old subscr requests", new_compat_count_subscr);
810 responses[num_resp++] = strsave(buf);
812 #endif /* NEW_COMPAT */
813 for (realm = otherrealms, i = 0; i < nrealms ; i++, realm++) {
814 sprintf(buf, "%s(%s)/%s", realm->name,
815 inet_ntoa((realm->addrs[realm->idx]).sin_addr),
816 rlm_states[(int) realm->state]);
817 responses[num_resp++] = strsave(buf);
820 send_msg_list(who, ADMIN_STATUS, responses, num_resp, 0);
822 /* Start at one; don't try to free static version string */
823 for (i = 1; i < num_resp; i++)
829 * Get a list of server addresses.
831 * This list is retrieved from Hesiod.
833 * This list is read from a file.
835 * Return a pointer to an array of allocated storage. This storage is
836 * freed by the caller.
839 static struct in_addr *
840 get_server_addrs(int *number)
843 char **server_hosts = NULL;
844 char **server_hosts_free = NULL;
846 struct in_addr *addrs;
847 struct in_addr *addr;
850 server_hosts = get_server_list(list_file);
851 server_hosts_free = server_hosts;
854 server_hosts = hes_resolve("zephyr","sloc");
857 server_hosts = get_single_server();
858 server_hosts_free = server_hosts;
864 for (cpp = server_hosts; *cpp; cpp++)
867 addrs = (struct in_addr *) malloc(i * sizeof(struct in_addr));
869 /* Convert to in_addr's */
870 for (cpp = server_hosts, addr = addrs, i = 0; *cpp; cpp++) {
871 hp = gethostbyname(*cpp);
873 memcpy(addr, hp->h_addr, sizeof(struct in_addr));
876 syslog(LOG_WARNING, "hostname failed, %s", *cpp);
880 if (server_hosts_free)
881 free_server_list(server_hosts_free);
885 static int nhosts = 0;
888 * read "file" to get a list of names of hosts to peer with.
889 * The file should contain a list of host names, one per line.
893 get_server_list(char *file)
896 char buf[NS_MAXDNAME];
901 fp = fopen(file, "r");
904 /* start with 16, realloc if necessary */
906 ret_list = (char **) malloc(nhosts * sizeof(char *));
910 while (fgets(buf, sizeof(buf), fp)) {
911 /* nuke the newline, being careful not to overrun
912 the buffer searching for it with strlen() */
913 buf[sizeof(buf) - 1] = '\0';
914 newline = strchr(buf, '\n');
918 if (nused + 1 >= nhosts) {
919 /* get more pointer space if necessary */
920 /* +1 to leave room for null pointer */
921 ret_list = (char **) realloc(ret_list, nhosts * 2);
924 ret_list[nused++] = strsave(buf);
931 ret_list[nused] = NULL;
936 get_single_server(void)
938 char buf[NS_MAXDNAME];
942 ret_list = (char **) malloc(nhosts * sizeof(char *));
945 if (gethostname(buf, sizeof(buf)) < 0) {
949 ret_list[nused++] = strsave(buf);
950 ret_list[nused] = NULL;
955 * free storage allocated by get_server_list
958 free_server_list(char **list)
960 char **orig_list = list;
962 if (!nhosts) /* nothing allocated */
964 for (; *list; list++)
971 * initialize the server structure for address addr, and set a timer
972 * to go off immediately to send hello's to other servers.
976 setup_server(Server *server,
977 struct in_addr *addr)
979 server->state = SERV_DEAD;
980 server->timeout = timo_dead;
981 server->num_hello_sent = 0;
982 server->addr.sin_family = AF_INET;
983 /* he listens to the same port we do */
984 server->addr.sin_port = srv_addr.sin_port;
985 server->addr.sin_addr = *addr;
986 strcpy(server->addr_str, inet_ntoa(*addr));
987 server->timer = timer_set_rel(0L, server_timo, server);
988 server->queue = NULL;
993 * Someone sent us a hello message, respond to them.
997 hello_respond(struct sockaddr_in *who,
1003 send_msg(who, ADMIN_IMHERE, auth);
1007 /* If we think he's down, schedule an immediate HELLO. */
1009 which = server_which_server(who);
1013 switch (which->state) {
1015 /* he said hello, we thought he was dead.
1016 reschedule his hello for now. */
1017 timer_reset(which->timer);
1018 which->timer = timer_set_rel(0L, server_timo, which);
1029 * return the server descriptor for server at who
1033 server_which_server(struct sockaddr_in *who)
1038 if (who->sin_port != srv_addr.sin_port)
1041 /* don't check limbo */
1042 for (server = &otherservers[1], i = 1; i < nservers; i++, server++) {
1043 if (server->addr.sin_addr.s_addr == who->sin_addr.s_addr)
1050 * We received a response to a hello packet or an ack. Adjust server state
1054 srv_responded(struct sockaddr_in *who)
1056 Server *which = server_which_server(who);
1059 syslog(LOG_ERR, "hello input from non-server?!");
1063 switch (which->state) {
1065 /* he responded, we thought he was dead. mark as starting
1067 which->state = SERV_STARTING;
1068 which->timeout = timo_tardy;
1069 timer_reset(which->timer);
1070 which->timer = timer_set_rel(0L, server_timo, which);
1073 /* here we negotiate and set up a braindump */
1074 if (bdump_socket < 0)
1079 which->state = SERV_UP;
1083 /* reset the timer and counts */
1084 which->num_hello_sent = 0;
1085 which->timeout = timo_up;
1086 timer_reset(which->timer);
1087 which->timer = timer_set_rel(which->timeout, server_timo, which);
1093 * Send each of the other servers a shutdown message.
1097 server_shutdown(void)
1101 /* don't tell limbo to go away, start at 1*/
1102 for (i = 1; i < nservers; i++)
1103 send_msg(&otherservers[i].addr, ADMIN_SHUTDOWN, 1);
1107 * send a message to who with admin class and opcode and clinst as specified.
1108 * auth is set if we want to send authenticated
1112 send_msg(struct sockaddr_in *who,
1117 ZNotice_t *pnotice; /* speed hack */
1124 memset (¬ice, 0, sizeof(notice));
1126 pnotice->z_kind = ACKED;
1128 pnotice->z_port = srv_addr.sin_port;
1129 pnotice->z_class = ZEPHYR_ADMIN_CLASS;
1130 pnotice->z_class_inst = "";
1131 pnotice->z_opcode = opcode;
1132 pnotice->z_sender = myname; /* myname is the hostname */
1133 pnotice->z_recipient = "";
1134 pnotice->z_default_format = "";
1135 pnotice->z_message = NULL;
1136 pnotice->z_message_len = 0;
1137 pnotice->z_num_other_fields = 0;
1139 /* XXX for now, we don't do authentication */
1142 retval = ZFormatNotice(pnotice, &pack, &packlen, auth ? ZAUTH : ZNOAUTH);
1143 if (retval != ZERR_NONE) {
1144 syslog(LOG_WARNING, "snd_msg format: %s", error_message(retval));
1147 retval = ZSetDestAddr(who);
1148 if (retval != ZERR_NONE) {
1149 syslog(LOG_WARNING, "snd_msg set addr: %s", error_message(retval));
1153 /* don't wait for ack */
1154 retval = ZSendPacket(pack, packlen, 0);
1155 if (retval != ZERR_NONE)
1156 syslog(LOG_WARNING, "snd_msg xmit: %s", error_message(retval));
1161 * send a notice with a message to who with admin class and opcode and
1162 * message body as specified.
1163 * auth is set if we want to send authenticated
1164 * server_idx is -1 if we are sending to a client, or the server index
1165 * if we are sending to a server.
1169 send_msg_list(struct sockaddr_in *who,
1180 memset (¬ice, 0, sizeof(notice));
1182 notice.z_kind = UNSAFE;
1183 notice.z_port = srv_addr.sin_port;
1184 notice.z_class = ZEPHYR_ADMIN_CLASS;
1185 notice.z_class_inst = "";
1186 notice.z_opcode = opcode;
1187 notice.z_sender = myname; /* myname is the hostname */
1188 notice.z_recipient = "";
1189 notice.z_default_format = "";
1190 notice.z_message = NULL;
1191 notice.z_message_len = 0;
1192 notice.z_num_other_fields = 0;
1194 /* XXX for now, we don't do authentication */
1197 retval = ZFormatNoticeList(¬ice, lyst, num, &pack, &packlen,
1198 auth ? ZAUTH : ZNOAUTH);
1199 if (retval != ZERR_NONE) {
1200 syslog(LOG_WARNING, "snd_msg_lst format: %s", error_message(retval));
1203 retval = ZSetDestAddr(who);
1204 if (retval != ZERR_NONE) {
1205 syslog(LOG_WARNING, "snd_msg_lst set addr: %s", error_message(retval));
1209 xmit_frag(¬ice, pack, packlen, 0);
1214 * Forward the notice to the other servers
1218 server_forward(ZNotice_t *notice,
1220 struct sockaddr_in *who)
1227 /* don't send to limbo */
1228 for (i = 1; i < nservers; i++) {
1229 if (i == me_server_idx) /* don't xmit to myself */
1231 if (otherservers[i].state == SERV_DEAD &&
1232 otherservers[i].dumping == 0) {
1233 /* if we are dumping to him, we want to
1234 queue it, even if he's dead */
1238 pack = malloc(sizeof(ZPacket_t));
1240 syslog(LOG_CRIT, "srv_fwd malloc");
1243 retval = ZNewFormatSmallRawNotice(notice, pack, &packlen);
1244 if (retval != ZERR_NONE) {
1245 syslog(LOG_WARNING, "srv_fwd format: %s", error_message(retval));
1249 if (otherservers[i].dumping) {
1250 server_queue(&otherservers[i], packlen, pack, auth, who);
1253 server_forw_reliable(&otherservers[i], pack, packlen, notice);
1258 server_forw_reliable(Server *server,
1267 retval = ZSetDestAddr(&server->addr);
1268 if (retval != ZERR_NONE) {
1269 syslog(LOG_WARNING, "srv_fwd_rel set addr: %s", error_message(retval));
1273 retval = ZSendPacket(pack, packlen, 0);
1274 if (retval != ZERR_NONE) {
1275 syslog(LOG_WARNING, "srv_fwd xmit: %s", error_message(retval));
1279 /* now we've sent it, mark it as not ack'ed */
1281 nacked = (Unacked *) malloc(sizeof(Unacked));
1283 /* no space: just punt */
1284 syslog(LOG_ERR, "srv_forw_rel nack malloc");
1289 nacked->client = NULL;
1290 nacked->rexmits = 0;
1291 nacked->packet = pack;
1292 nacked->dest.srv_idx = server - otherservers;
1293 nacked->packsz = packlen;
1294 nacked->uid = notice->z_uid;
1295 nacked->timer = timer_set_rel(rexmit_times[0], srv_rexmit, nacked);
1296 hashval = srv_nacktab_hashval(nacked->dest.srv_idx, nacked->uid);
1297 Unacked_insert(&srv_nacktab[hashval], nacked);
1301 * send the queued message for the server.
1305 server_send_queue(Server *server)
1311 while (server->queue) {
1312 pending = server_dequeue(server);
1313 status = ZParseNotice(pending->packet, pending->len, ¬ice);
1314 if (status != ZERR_NONE) {
1315 syslog(LOG_ERR, "ssq bad notice parse (%s): %s",
1316 inet_ntoa(pending->who.sin_addr), error_message(status));
1318 server_forw_reliable(server, pending->packet, pending->len,
1321 /* ACK handling routines will free the packet */
1327 * a server has acknowledged a message we sent to him; remove it from
1328 * server unacked queue
1332 srv_nack_cancel(ZNotice_t *notice,
1333 struct sockaddr_in *who)
1335 Server *server = server_which_server(who);
1340 syslog(LOG_ERR, "non-server ack?");
1343 hashval = srv_nacktab_hashval(server - otherservers, notice->z_uid);
1344 for (nacked = srv_nacktab[hashval]; nacked; nacked = nacked->next) {
1345 if (nacked->dest.srv_idx == server - otherservers
1346 && ZCompareUID(&nacked->uid, ¬ice->z_uid)) {
1347 timer_reset(nacked->timer);
1348 free(nacked->packet);
1349 Unacked_delete(nacked);
1357 * retransmit a message to another server
1361 srv_rexmit(void *arg)
1363 Unacked *packet = (Unacked *) arg;
1365 /* retransmit the packet */
1367 if (otherservers[packet->dest.srv_idx].state == SERV_DEAD) {
1368 Unacked_delete(packet);
1369 free(packet->packet);
1370 srv_nack_release(&otherservers[packet->dest.srv_idx]);
1374 retval = ZSetDestAddr(&otherservers[packet->dest.srv_idx].addr);
1375 if (retval != ZERR_NONE) {
1376 syslog(LOG_WARNING, "srv_rexmit set addr: %s", error_message(retval));
1378 retval = ZSendPacket(packet->packet, packet->packsz, 0);
1379 if (retval != ZERR_NONE)
1380 syslog(LOG_WARNING, "srv_rexmit xmit: %s",
1381 error_message(retval));
1384 /* reset the timer */
1385 if (rexmit_times[packet->rexmits + 1] != -1)
1387 packet->timer = timer_set_rel(rexmit_times[packet->rexmits], srv_rexmit,
1392 * Clean up the not-yet-acked queue and release anything destined
1397 srv_nack_release(Server *server)
1400 Unacked *nacked, *next;
1402 for (i = 0; i < SRV_NACKTAB_HASHSIZE; i++) {
1403 for (nacked = srv_nacktab[i]; nacked; nacked = next) {
1404 next = nacked->next;
1405 if (nacked->dest.srv_idx == server - otherservers) {
1406 timer_reset(nacked->timer);
1407 Unacked_delete(nacked);
1408 free(nacked->packet);
1416 * Adjust indices of not-yet-acked packets sent to other servers to
1417 * continue to refer to the correct server.
1421 srv_nack_renumber (int *new_idx)
1423 /* XXX release any private queue for this server */
1427 /* search the not-yet-acked list for anything destined to 'from', and
1428 change the index to 'to'. */
1429 for (i = 0; i < SRV_NACKTAB_HASHSIZE; i++) {
1430 for (nacked = srv_nacktab[i]; nacked; nacked = nacked->next) {
1431 idx = new_idx[nacked->dest.srv_idx];
1433 syslog(LOG_ERR, "srv_nack_renumber error: [%d]=%d",
1434 nacked->dest.srv_idx, idx);
1437 nacked->dest.srv_idx = idx;
1443 * Queue this notice to be transmitted to the server when it is ready.
1446 server_queue(Server *server,
1450 struct sockaddr_in *who)
1454 pending = (Pending *) malloc(sizeof(Pending));
1456 syslog(LOG_CRIT, "update_queue malloc");
1459 pending->packet = pack;
1461 pending->auth = auth;
1462 pending->who = *who;
1463 pending->next = NULL;
1465 /* put it on the end of the list */
1467 server->queue_last->next = pending;
1469 server->queue = server->queue_last = pending;
1473 * Pull a notice off the hold queue.
1477 server_dequeue(Server *server)
1483 pending = server->queue;
1484 server->queue = pending->next;
1489 * free storage used by a pending queue entry.
1493 server_pending_free(Pending *pending)
1495 free(pending->packet);
1501 * Queue something to be handled later by this server.
1505 server_self_queue(ZNotice_t* notice,
1507 struct sockaddr_in * who)
1513 retval = ZFormatRawNotice(notice, &pack, &packlen);
1514 if (retval != ZERR_NONE) {
1515 syslog(LOG_CRIT, "srv_self_queue format: %s", error_message(retval));
1518 server_queue(me_server, packlen, pack, auth, who);
1522 * dump info about servers onto the fp.
1523 * assumed to be called with SIGFPE blocked
1524 * (true if called from signal handler)
1527 server_dump_servers(FILE *fp)
1531 for (i = 0; i < nservers ; i++) {
1532 fprintf(fp, "%d:%s/%s%s\n", i, otherservers[i].addr_str,
1533 srv_states[otherservers[i].state],
1534 otherservers[i].dumping ? " (DUMPING)" : "");