2 * Unix networking abstraction.
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/ioctl.h>
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 #include <netinet/tcp.h>
20 #define DEFINE_PLUG_METHOD_MACROS
26 # define X11_UNIX_PATH "/tmp/.X11-unix/X"
29 #define ipv4_is_loopback(addr) (inet_netof(addr) == IN_LOOPBACKNET)
32 struct socket_function_table *fn;
33 /* the above variable absolutely *must* be the first in this structure */
41 int frozen; /* this causes readability notifications to be ignored */
42 int frozen_readable; /* this means we missed at least one readability
43 * notification while we were frozen */
44 int localhost_only; /* for listening sockets */
47 int oobpending; /* is there OOB data available to read? */
49 int pending_error; /* in case send() returns error */
54 * We used to typedef struct Socket_tag *Socket.
56 * Since we have made the networking abstraction slightly more
57 * abstract, Socket no longer means a tcp socket (it could mean
58 * an ssl socket). So now we must use Actual_Socket when we know
59 * we are talking about a tcp socket.
61 typedef struct Socket_tag *Actual_Socket;
66 * Which address family this address belongs to. AF_INET for
67 * IPv4; AF_INET6 for IPv6; AF_UNSPEC indicates that name
68 * resolution has not been done and a simple host name is held
69 * in this SockAddr structure.
73 struct addrinfo *ai; /* Address IPv6 style. */
75 unsigned long address; /* Address IPv4 style. */
77 char hostname[512]; /* Store an unresolved host name. */
80 static tree234 *sktree;
82 static void uxsel_tell(Actual_Socket s);
84 static int cmpfortree(void *av, void *bv)
86 Actual_Socket a = (Actual_Socket) av, b = (Actual_Socket) bv;
87 int as = a->s, bs = b->s;
95 static int cmpforsearch(void *av, void *bv)
97 Actual_Socket b = (Actual_Socket) bv;
98 int as = *(int *)av, bs = b->s;
108 sktree = newtree234(cmpfortree);
111 void sk_cleanup(void)
117 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
123 const char *error_string(int error)
125 return strerror(error);
128 SockAddr sk_namelookup(const char *host, char **canonicalname)
130 SockAddr ret = snew(struct SockAddr_tag);
132 struct addrinfo hints;
136 struct hostent *h = NULL;
140 /* Clear the structure and default to IPv4. */
141 memset(ret, 0, sizeof(struct SockAddr_tag));
142 ret->family = 0; /* We set this one when we have resolved the host. */
147 hints.ai_flags = AI_CANONNAME;
148 hints.ai_family = AF_UNSPEC;
149 hints.ai_socktype = 0;
150 hints.ai_protocol = 0;
151 hints.ai_addrlen = 0;
152 hints.ai_addr = NULL;
153 hints.ai_canonname = NULL;
154 hints.ai_next = NULL;
155 err = getaddrinfo(host, NULL, NULL, &ret->ai);
157 ret->error = gai_strerror(err);
160 ret->family = ret->ai->ai_family;
162 if (ret->ai->ai_canonname != NULL)
163 strncat(realhost, ret->ai->ai_canonname, sizeof(realhost) - 1);
165 strncat(realhost, host, sizeof(realhost) - 1);
167 if ((a = inet_addr(host)) == (unsigned long) INADDR_NONE) {
169 * Otherwise use the IPv4-only gethostbyname... (NOTE:
170 * we don't use gethostbyname as a fallback!)
172 if (ret->family == 0) {
173 /*debug(("Resolving \"%s\" with gethostbyname() (IPv4 only)...\n", host)); */
174 if ( (h = gethostbyname(host)) )
175 ret->family = AF_INET;
177 if (ret->family == 0) {
178 ret->error = (h_errno == HOST_NOT_FOUND ||
179 h_errno == NO_DATA ||
180 h_errno == NO_ADDRESS ? "Host does not exist" :
181 h_errno == TRY_AGAIN ?
182 "Temporary name service failure" :
183 "gethostbyname: unknown error");
186 memcpy(&a, h->h_addr, sizeof(a));
187 /* This way we are always sure the h->h_name is valid :) */
188 strncpy(realhost, h->h_name, sizeof(realhost));
191 * This must be a numeric IPv4 address because it caused a
192 * success return from inet_addr.
194 ret->family = AF_INET;
195 strncpy(realhost, host, sizeof(realhost));
197 ret->address = ntohl(a);
199 realhost[lenof(realhost)-1] = '\0';
200 *canonicalname = snewn(1+strlen(realhost), char);
201 strcpy(*canonicalname, realhost);
205 SockAddr sk_nonamelookup(const char *host)
207 SockAddr ret = snew(struct SockAddr_tag);
209 ret->family = AF_UNSPEC;
210 strncpy(ret->hostname, host, lenof(ret->hostname));
211 ret->hostname[lenof(ret->hostname)-1] = '\0';
215 void sk_getaddr(SockAddr addr, char *buf, int buflen)
218 if (addr->family == AF_UNSPEC) {
219 strncpy(buf, addr->hostname, buflen);
220 buf[buflen-1] = '\0';
223 if (getnameinfo(addr->ai->ai_addr, addr->ai->ai_addrlen, buf, buflen,
224 NULL, 0, NI_NUMERICHOST) != 0) {
226 strncat(buf, "<unknown>", buflen - 1);
230 assert(addr->family == AF_INET);
231 a.s_addr = htonl(addr->address);
232 strncpy(buf, inet_ntoa(a), buflen);
233 buf[buflen-1] = '\0';
238 int sk_hostname_is_local(char *name)
240 return !strcmp(name, "localhost");
243 int sk_address_is_local(SockAddr addr)
246 if (addr->family == AF_UNSPEC)
247 return 0; /* we don't know; assume not */
250 if (addr->family == AF_INET)
251 return ipv4_is_loopback(
252 ((struct sockaddr_in *)addr->ai->ai_addr)->sin_addr);
253 else if (addr->family == AF_INET6)
254 return IN6_IS_ADDR_LOOPBACK(
255 &((struct sockaddr_in6 *)addr->ai->ai_addr)->sin6_addr);
260 assert(addr->family == AF_INET);
261 a.s_addr = htonl(addr->address);
262 return ipv4_is_loopback(a);
267 int sk_addrtype(SockAddr addr)
269 return (addr->family == AF_INET ? ADDRTYPE_IPV4 :
271 addr->family == AF_INET6 ? ADDRTYPE_IPV6 :
276 void sk_addrcopy(SockAddr addr, char *buf)
280 if (addr->family == AF_INET)
281 memcpy(buf, &((struct sockaddr_in *)addr->ai->ai_addr)->sin_addr,
282 sizeof(struct in_addr));
283 else if (addr->family == AF_INET6)
284 memcpy(buf, &((struct sockaddr_in6 *)addr->ai->ai_addr)->sin6_addr,
285 sizeof(struct in6_addr));
291 assert(addr->family == AF_INET);
292 a.s_addr = htonl(addr->address);
293 memcpy(buf, (char*) &a.s_addr, 4);
297 void sk_addr_free(SockAddr addr)
301 if (addr->ai != NULL)
302 freeaddrinfo(addr->ai);
307 static Plug sk_tcp_plug(Socket sock, Plug p)
309 Actual_Socket s = (Actual_Socket) sock;
316 static void sk_tcp_flush(Socket s)
319 * We send data to the socket as soon as we can anyway,
320 * so we don't need to do anything here. :-)
324 static void sk_tcp_close(Socket s);
325 static int sk_tcp_write(Socket s, const char *data, int len);
326 static int sk_tcp_write_oob(Socket s, const char *data, int len);
327 static void sk_tcp_set_private_ptr(Socket s, void *ptr);
328 static void *sk_tcp_get_private_ptr(Socket s);
329 static void sk_tcp_set_frozen(Socket s, int is_frozen);
330 static const char *sk_tcp_socket_error(Socket s);
332 static struct socket_function_table tcp_fn_table = {
338 sk_tcp_set_private_ptr,
339 sk_tcp_get_private_ptr,
344 Socket sk_register(OSSocket sockfd, Plug plug)
349 * Create Socket structure.
351 ret = snew(struct Socket_tag);
352 ret->fn = &tcp_fn_table;
355 bufchain_init(&ret->output_data);
356 ret->writable = 1; /* to start with */
357 ret->sending_oob = 0;
359 ret->frozen_readable = 0;
360 ret->localhost_only = 0; /* unused, but best init anyway */
361 ret->pending_error = 0;
362 ret->oobpending = FALSE;
368 ret->error = error_string(errno);
380 Socket sk_new(SockAddr addr, int port, int privport, int oobinline,
381 int nodelay, int keepalive, Plug plug)
385 struct sockaddr_in6 a6;
387 struct sockaddr_in a;
388 struct sockaddr_un au;
389 const struct sockaddr *sa;
396 * Create Socket structure.
398 ret = snew(struct Socket_tag);
399 ret->fn = &tcp_fn_table;
402 bufchain_init(&ret->output_data);
403 ret->connected = 0; /* to start with */
404 ret->writable = 0; /* to start with */
405 ret->sending_oob = 0;
407 ret->frozen_readable = 0;
408 ret->localhost_only = 0; /* unused, but best init anyway */
409 ret->pending_error = 0;
410 ret->oobpending = FALSE;
416 assert(addr->family != AF_UNSPEC);
417 s = socket(addr->family, SOCK_STREAM, 0);
421 ret->error = error_string(errno);
425 ret->oobinline = oobinline;
428 setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (void *) &b, sizeof(b));
433 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void *) &b, sizeof(b));
438 setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, (void *) &b, sizeof(b));
442 * Bind to local address.
445 localport = 1023; /* count from 1023 downwards */
447 localport = 0; /* just use port 0 (ie kernel picks) */
449 /* BSD IP stacks need sockaddr_in zeroed before filling in */
450 memset(&a,'\0',sizeof(struct sockaddr_in));
452 memset(&a6,'\0',sizeof(struct sockaddr_in6));
455 /* We don't try to bind to a local address for UNIX domain sockets. (Why
456 * do we bother doing the bind when localport == 0 anyway?) */
457 if(addr->family != AF_UNIX) {
458 /* Loop round trying to bind */
463 if (addr->family == AF_INET6) {
464 /* XXX use getaddrinfo to get a local address? */
465 a6.sin6_family = AF_INET6;
466 a6.sin6_addr = in6addr_any;
467 a6.sin6_port = htons(localport);
468 retcode = bind(s, (struct sockaddr *) &a6, sizeof(a6));
472 assert(addr->family == AF_INET);
473 a.sin_family = AF_INET;
474 a.sin_addr.s_addr = htonl(INADDR_ANY);
475 a.sin_port = htons(localport);
476 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
483 if (err != EADDRINUSE) /* failed, for a bad reason */
488 break; /* we're only looping once */
491 break; /* we might have got to the end */
495 ret->error = error_string(err);
501 * Connect to remote address.
503 switch(addr->family) {
506 /* XXX would be better to have got getaddrinfo() to fill in the port. */
507 ((struct sockaddr_in *)addr->ai->ai_addr)->sin_port =
509 sa = (const struct sockaddr *)addr->ai->ai_addr;
510 salen = addr->ai->ai_addrlen;
513 ((struct sockaddr_in *)addr->ai->ai_addr)->sin_port =
515 sa = (const struct sockaddr *)addr->ai->ai_addr;
516 salen = addr->ai->ai_addrlen;
520 a.sin_family = AF_INET;
521 a.sin_addr.s_addr = htonl(addr->address);
522 a.sin_port = htons((short) port);
523 sa = (const struct sockaddr *)&a;
528 assert(port == 0); /* to catch confused people */
529 assert(strlen(addr->hostname) < sizeof au.sun_path);
530 memset(&au, 0, sizeof au);
531 au.sun_family = AF_UNIX;
532 strcpy(au.sun_path, addr->hostname);
533 sa = (const struct sockaddr *)&au;
538 assert(0 && "unknown address family");
541 fl = fcntl(s, F_GETFL);
543 fcntl(s, F_SETFL, fl | O_NONBLOCK);
545 if ((connect(s, sa, salen)) < 0) {
546 if ( errno != EINPROGRESS ) {
547 ret->error = error_string(errno);
552 * If we _don't_ get EWOULDBLOCK, the connect has completed
553 * and we should set the socket as connected and writable.
567 Socket sk_newlistener(char *srcaddr, int port, Plug plug, int local_host_only)
572 struct sockaddr_in6 a6;
574 struct addrinfo hints, *ai;
577 struct sockaddr_in a;
584 * Create Socket structure.
586 ret = snew(struct Socket_tag);
587 ret->fn = &tcp_fn_table;
590 bufchain_init(&ret->output_data);
591 ret->writable = 0; /* to start with */
592 ret->sending_oob = 0;
594 ret->frozen_readable = 0;
595 ret->localhost_only = local_host_only;
596 ret->pending_error = 0;
597 ret->oobpending = FALSE;
603 s = socket(AF_INET, SOCK_STREAM, 0);
607 ret->error = error_string(errno);
613 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)&on, sizeof(on));
615 /* BSD IP stacks need sockaddr_in zeroed before filling in */
616 memset(&a,'\0',sizeof(struct sockaddr_in));
619 memset(&a6,'\0',sizeof(struct sockaddr_in6));
621 hints.ai_flags = AI_NUMERICHOST;
622 hints.ai_family = AF_UNSPEC;
623 hints.ai_socktype = 0;
624 hints.ai_protocol = 0;
625 hints.ai_addrlen = 0;
626 hints.ai_addr = NULL;
627 hints.ai_canonname = NULL;
628 hints.ai_next = NULL;
629 sprintf(portstr, "%d", port);
630 if (srcaddr != NULL && getaddrinfo(srcaddr, portstr, &hints, &ai) == 0)
631 retcode = bind(s, ai->ai_addr, ai->ai_addrlen);
636 * FIXME: Need two listening sockets, in principle, one for v4
640 a6.sin6_addr = in6addr_loopback;
642 a6.sin6_addr = in6addr_any;
643 a6.sin6_port = htons(port);
649 a.sin_family = AF_INET;
652 * Bind to source address. First try an explicitly
656 a.sin_addr.s_addr = inet_addr(srcaddr);
657 if (a.sin_addr.s_addr != INADDR_NONE) {
658 /* Override localhost_only with specified listen addr. */
659 ret->localhost_only = ipv4_is_loopback(a.sin_addr);
665 * ... and failing that, go with one of the standard ones.
669 a.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
671 a.sin_addr.s_addr = htonl(INADDR_ANY);
674 a.sin_port = htons((short)port);
675 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
685 ret->error = error_string(err);
690 if (listen(s, SOMAXCONN) < 0) {
692 ret->error = error_string(errno);
702 static void sk_tcp_close(Socket sock)
704 Actual_Socket s = (Actual_Socket) sock;
712 int sk_getxdmdata(void *sock, unsigned long *ip, int *port)
714 Actual_Socket s = (Actual_Socket) sock;
715 struct sockaddr_in addr;
719 * We must check that this socket really _is_ an Actual_Socket.
721 if (s->fn != &tcp_fn_table)
722 return 0; /* failure */
724 addrlen = sizeof(addr);
725 if (getsockname(s->s, (struct sockaddr *)&addr, &addrlen) < 0)
727 switch(addr.sin_family) {
729 *ip = ntohl(addr.sin_addr.s_addr);
730 *port = ntohs(addr.sin_port);
734 * For a Unix socket, we return 0xFFFFFFFF for the IP address and
735 * our current pid for the port. Bizarre, but such is life.
737 *ip = ntohl(0xFFFFFFFF);
751 * The function which tries to send on a socket once it's deemed
754 void try_send(Actual_Socket s)
756 while (s->sending_oob || bufchain_size(&s->output_data) > 0) {
762 if (s->sending_oob) {
763 urgentflag = MSG_OOB;
764 len = s->sending_oob;
768 bufchain_prefix(&s->output_data, &data, &len);
770 nsent = send(s->s, data, len, urgentflag);
771 noise_ultralight(nsent);
773 err = (nsent < 0 ? errno : 0);
774 if (err == EWOULDBLOCK) {
776 * Perfectly normal: we've sent all we can for the moment.
780 } else if (nsent == 0 ||
781 err == ECONNABORTED || err == ECONNRESET) {
783 * If send() returns CONNABORTED or CONNRESET, we
784 * unfortunately can't just call plug_closing(),
785 * because it's quite likely that we're currently
786 * _in_ a call from the code we'd be calling back
787 * to, so we'd have to make half the SSH code
788 * reentrant. Instead we flag a pending error on
789 * the socket, to be dealt with (by calling
790 * plug_closing()) at some suitable future moment.
792 s->pending_error = err;
795 /* We're inside the Unix frontend here, so we know
796 * that the frontend handle is unnecessary. */
797 logevent(NULL, error_string(err));
798 fatalbox("%s", error_string(err));
801 if (s->sending_oob) {
803 memmove(s->oobdata, s->oobdata+nsent, len-nsent);
804 s->sending_oob = len - nsent;
809 bufchain_consume(&s->output_data, nsent);
816 static int sk_tcp_write(Socket sock, const char *buf, int len)
818 Actual_Socket s = (Actual_Socket) sock;
821 * Add the data to the buffer list on the socket.
823 bufchain_add(&s->output_data, buf, len);
826 * Now try sending from the start of the buffer list.
832 * Update the select() status to correctly reflect whether or
833 * not we should be selecting for write.
837 return bufchain_size(&s->output_data);
840 static int sk_tcp_write_oob(Socket sock, const char *buf, int len)
842 Actual_Socket s = (Actual_Socket) sock;
845 * Replace the buffer list on the socket with the data.
847 bufchain_clear(&s->output_data);
848 assert(len <= sizeof(s->oobdata));
849 memcpy(s->oobdata, buf, len);
850 s->sending_oob = len;
853 * Now try sending from the start of the buffer list.
859 * Update the select() status to correctly reflect whether or
860 * not we should be selecting for write.
864 return s->sending_oob;
867 static int net_select_result(int fd, int event)
871 char buf[20480]; /* nice big buffer for plenty of speed */
875 /* Find the Socket structure */
876 s = find234(sktree, &fd, cmpforsearch);
878 return 1; /* boggle */
880 noise_ultralight(event);
883 case 4: /* exceptional */
886 * On a non-oobinline socket, this indicates that we
887 * can immediately perform an OOB read and get back OOB
888 * data, which we will send to the back end with
889 * type==2 (urgent data).
891 ret = recv(s->s, buf, sizeof(buf), MSG_OOB);
892 noise_ultralight(ret);
894 const char *str = (ret == 0 ? "Internal networking trouble" :
895 error_string(errno));
896 /* We're inside the Unix frontend here, so we know
897 * that the frontend handle is unnecessary. */
901 return plug_receive(s->plug, 2, buf, ret);
907 * If we reach here, this is an oobinline socket, which
908 * means we should set s->oobpending and then deal with it
909 * when we get called for the readability event (which
910 * should also occur).
912 s->oobpending = TRUE;
914 case 1: /* readable; also acceptance */
917 * On a listening socket, the readability event means a
918 * connection is ready to be accepted.
920 struct sockaddr_in isa;
921 int addrlen = sizeof(struct sockaddr_in);
922 int t; /* socket of connection */
924 memset(&isa, 0, sizeof(struct sockaddr_in));
926 t = accept(s->s,(struct sockaddr *)&isa,(socklen_t *) &addrlen);
931 if (s->localhost_only && !ipv4_is_loopback(isa.sin_addr)) {
932 close(t); /* someone let nonlocal through?! */
933 } else if (plug_accepting(s->plug, t)) {
934 close(t); /* denied or error */
940 * If we reach here, this is not a listening socket, so
941 * readability really means readability.
944 /* In the case the socket is still frozen, we don't even bother */
946 s->frozen_readable = 1;
951 * We have received data on the socket. For an oobinline
952 * socket, this might be data _before_ an urgent pointer,
953 * in which case we send it to the back end with type==1
954 * (data prior to urgent).
956 if (s->oobinline && s->oobpending) {
958 if (ioctl(s->s, SIOCATMARK, &atmark) == 0 && atmark)
959 s->oobpending = FALSE; /* clear this indicator */
963 ret = recv(s->s, buf, s->oobpending ? 1 : sizeof(buf), 0);
964 noise_ultralight(ret);
966 if (errno == EWOULDBLOCK) {
971 return plug_closing(s->plug, error_string(errno), errno, 0);
972 } else if (0 == ret) {
973 return plug_closing(s->plug, NULL, 0, 0);
975 return plug_receive(s->plug, atmark ? 0 : 1, buf, ret);
978 case 2: /* writable */
981 * select() reports a socket as _writable_ when an
982 * asynchronous connection is completed.
984 s->connected = s->writable = 1;
988 int bufsize_before, bufsize_after;
990 bufsize_before = s->sending_oob + bufchain_size(&s->output_data);
992 bufsize_after = s->sending_oob + bufchain_size(&s->output_data);
993 if (bufsize_after < bufsize_before)
994 plug_sent(s->plug, bufsize_after);
1003 * Deal with socket errors detected in try_send().
1005 void net_pending_errors(void)
1011 * This might be a fiddly business, because it's just possible
1012 * that handling a pending error on one socket might cause
1013 * others to be closed. (I can't think of any reason this might
1014 * happen in current SSH implementation, but to maintain
1015 * generality of this network layer I'll assume the worst.)
1017 * So what we'll do is search the socket list for _one_ socket
1018 * with a pending error, and then handle it, and then search
1019 * the list again _from the beginning_. Repeat until we make a
1020 * pass with no socket errors present. That way we are
1021 * protected against the socket list changing under our feet.
1025 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
1026 if (s->pending_error) {
1028 * An error has occurred on this socket. Pass it to the
1031 plug_closing(s->plug, error_string(s->pending_error),
1032 s->pending_error, 0);
1040 * Each socket abstraction contains a `void *' private field in
1041 * which the client can keep state.
1043 static void sk_tcp_set_private_ptr(Socket sock, void *ptr)
1045 Actual_Socket s = (Actual_Socket) sock;
1046 s->private_ptr = ptr;
1049 static void *sk_tcp_get_private_ptr(Socket sock)
1051 Actual_Socket s = (Actual_Socket) sock;
1052 return s->private_ptr;
1056 * Special error values are returned from sk_namelookup and sk_new
1057 * if there's a problem. These functions extract an error message,
1058 * or return NULL if there's no problem.
1060 const char *sk_addr_error(SockAddr addr)
1064 static const char *sk_tcp_socket_error(Socket sock)
1066 Actual_Socket s = (Actual_Socket) sock;
1070 static void sk_tcp_set_frozen(Socket sock, int is_frozen)
1072 Actual_Socket s = (Actual_Socket) sock;
1073 if (s->frozen == is_frozen)
1075 s->frozen = is_frozen;
1076 if (!is_frozen && s->frozen_readable) {
1078 recv(s->s, &c, 1, MSG_PEEK);
1080 s->frozen_readable = 0;
1084 static void uxsel_tell(Actual_Socket s)
1088 rwx |= 2; /* write == connect */
1089 if (s->connected && !s->frozen)
1090 rwx |= 1 | 4; /* read, except */
1091 if (bufchain_size(&s->output_data))
1092 rwx |= 2; /* write */
1094 rwx |= 1; /* read == accept */
1095 uxsel_set(s->s, rwx, net_select_result);
1098 int net_service_lookup(char *service)
1101 se = getservbyname(service, NULL);
1103 return ntohs(se->s_port);
1108 SockAddr platform_get_x11_unix_address(int displaynum, char **canonicalname)
1110 SockAddr ret = snew(struct SockAddr_tag);
1113 memset(ret, 0, sizeof *ret);
1114 ret->family = AF_UNIX;
1115 n = snprintf(ret->hostname, sizeof ret->hostname,
1116 "%s%d", X11_UNIX_PATH, displaynum);
1118 ret->error = "snprintf failed";
1119 else if(n >= sizeof ret->hostname)
1120 ret->error = "X11 UNIX name too long";
1122 *canonicalname = dupstr(ret->hostname);