2 * Unix networking abstraction.
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/ioctl.h>
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 #include <netinet/tcp.h>
19 #define DEFINE_PLUG_METHOD_MACROS
24 #define ipv4_is_loopback(addr) (inet_netof(addr) == IN_LOOPBACKNET)
27 struct socket_function_table *fn;
28 /* the above variable absolutely *must* be the first in this structure */
36 int frozen; /* this causes readability notifications to be ignored */
37 int frozen_readable; /* this means we missed at least one readability
38 * notification while we were frozen */
39 int localhost_only; /* for listening sockets */
42 int oobpending; /* is there OOB data available to read? */
44 int pending_error; /* in case send() returns error */
49 * We used to typedef struct Socket_tag *Socket.
51 * Since we have made the networking abstraction slightly more
52 * abstract, Socket no longer means a tcp socket (it could mean
53 * an ssl socket). So now we must use Actual_Socket when we know
54 * we are talking about a tcp socket.
56 typedef struct Socket_tag *Actual_Socket;
61 * Which address family this address belongs to. AF_INET for
62 * IPv4; AF_INET6 for IPv6; AF_UNSPEC indicates that name
63 * resolution has not been done and a simple host name is held
64 * in this SockAddr structure.
68 struct addrinfo *ai; /* Address IPv6 style. */
70 unsigned long address; /* Address IPv4 style. */
72 char hostname[512]; /* Store an unresolved host name. */
75 static tree234 *sktree;
77 static void uxsel_tell(Actual_Socket s);
79 static int cmpfortree(void *av, void *bv)
81 Actual_Socket a = (Actual_Socket) av, b = (Actual_Socket) bv;
82 int as = a->s, bs = b->s;
90 static int cmpforsearch(void *av, void *bv)
92 Actual_Socket b = (Actual_Socket) bv;
93 int as = *(int *)av, bs = b->s;
103 sktree = newtree234(cmpfortree);
106 void sk_cleanup(void)
112 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
118 const char *error_string(int error)
120 return strerror(error);
123 SockAddr sk_namelookup(const char *host, char **canonicalname)
125 SockAddr ret = snew(struct SockAddr_tag);
127 struct addrinfo hints;
131 struct hostent *h = NULL;
135 /* Clear the structure and default to IPv4. */
136 memset(ret, 0, sizeof(struct SockAddr_tag));
137 ret->family = 0; /* We set this one when we have resolved the host. */
142 hints.ai_flags = AI_CANONNAME;
143 hints.ai_family = AF_UNSPEC;
144 hints.ai_socktype = 0;
145 hints.ai_protocol = 0;
146 hints.ai_addrlen = 0;
147 hints.ai_addr = NULL;
148 hints.ai_canonname = NULL;
149 hints.ai_next = NULL;
150 err = getaddrinfo(host, NULL, NULL, &ret->ai);
152 ret->error = gai_strerror(err);
155 ret->family = ret->ai->ai_family;
157 if (ret->ai->ai_canonname != NULL)
158 strncat(realhost, ret->ai->ai_canonname, sizeof(realhost) - 1);
160 strncat(realhost, host, sizeof(realhost) - 1);
162 if ((a = inet_addr(host)) == (unsigned long) INADDR_NONE) {
164 * Otherwise use the IPv4-only gethostbyname... (NOTE:
165 * we don't use gethostbyname as a fallback!)
167 if (ret->family == 0) {
168 /*debug(("Resolving \"%s\" with gethostbyname() (IPv4 only)...\n", host)); */
169 if ( (h = gethostbyname(host)) )
170 ret->family = AF_INET;
172 if (ret->family == 0) {
173 ret->error = (h_errno == HOST_NOT_FOUND ||
174 h_errno == NO_DATA ||
175 h_errno == NO_ADDRESS ? "Host does not exist" :
176 h_errno == TRY_AGAIN ?
177 "Temporary name service failure" :
178 "gethostbyname: unknown error");
181 memcpy(&a, h->h_addr, sizeof(a));
182 /* This way we are always sure the h->h_name is valid :) */
183 strncpy(realhost, h->h_name, sizeof(realhost));
186 * This must be a numeric IPv4 address because it caused a
187 * success return from inet_addr.
189 ret->family = AF_INET;
190 strncpy(realhost, host, sizeof(realhost));
192 ret->address = ntohl(a);
194 realhost[lenof(realhost)-1] = '\0';
195 *canonicalname = snewn(1+strlen(realhost), char);
196 strcpy(*canonicalname, realhost);
200 SockAddr sk_nonamelookup(const char *host)
202 SockAddr ret = snew(struct SockAddr_tag);
204 ret->family = AF_UNSPEC;
205 strncpy(ret->hostname, host, lenof(ret->hostname));
206 ret->hostname[lenof(ret->hostname)-1] = '\0';
210 void sk_getaddr(SockAddr addr, char *buf, int buflen)
213 if (addr->family == AF_UNSPEC) {
214 strncpy(buf, addr->hostname, buflen);
215 buf[buflen-1] = '\0';
218 if (getnameinfo(addr->ai->ai_addr, addr->ai->ai_addrlen, buf, buflen,
219 NULL, 0, NI_NUMERICHOST) != 0) {
221 strncat(buf, "<unknown>", buflen - 1);
225 assert(addr->family == AF_INET);
226 a.s_addr = htonl(addr->address);
227 strncpy(buf, inet_ntoa(a), buflen);
228 buf[buflen-1] = '\0';
233 int sk_hostname_is_local(char *name)
235 return !strcmp(name, "localhost");
238 int sk_address_is_local(SockAddr addr)
241 if (addr->family == AF_UNSPEC)
242 return 0; /* we don't know; assume not */
245 if (addr->family == AF_INET)
246 return ipv4_is_loopback(
247 ((struct sockaddr_in *)addr->ai->ai_addr)->sin_addr);
248 else if (addr->family == AF_INET6)
249 return IN6_IS_ADDR_LOOPBACK(
250 &((struct sockaddr_in6 *)addr->ai->ai_addr)->sin6_addr);
255 assert(addr->family == AF_INET);
256 a.s_addr = htonl(addr->address);
257 return ipv4_is_loopback(a);
262 int sk_addrtype(SockAddr addr)
264 return (addr->family == AF_INET ? ADDRTYPE_IPV4 :
266 addr->family == AF_INET6 ? ADDRTYPE_IPV6 :
271 void sk_addrcopy(SockAddr addr, char *buf)
275 if (addr->family == AF_INET)
276 memcpy(buf, &((struct sockaddr_in *)addr->ai->ai_addr)->sin_addr,
277 sizeof(struct in_addr));
278 else if (addr->family == AF_INET6)
279 memcpy(buf, &((struct sockaddr_in6 *)addr->ai->ai_addr)->sin6_addr,
280 sizeof(struct in6_addr));
286 assert(addr->family == AF_INET);
287 a.s_addr = htonl(addr->address);
288 memcpy(buf, (char*) &a.s_addr, 4);
292 void sk_addr_free(SockAddr addr)
296 if (addr->ai != NULL)
297 freeaddrinfo(addr->ai);
302 static Plug sk_tcp_plug(Socket sock, Plug p)
304 Actual_Socket s = (Actual_Socket) sock;
311 static void sk_tcp_flush(Socket s)
314 * We send data to the socket as soon as we can anyway,
315 * so we don't need to do anything here. :-)
319 static void sk_tcp_close(Socket s);
320 static int sk_tcp_write(Socket s, const char *data, int len);
321 static int sk_tcp_write_oob(Socket s, const char *data, int len);
322 static void sk_tcp_set_private_ptr(Socket s, void *ptr);
323 static void *sk_tcp_get_private_ptr(Socket s);
324 static void sk_tcp_set_frozen(Socket s, int is_frozen);
325 static const char *sk_tcp_socket_error(Socket s);
327 static struct socket_function_table tcp_fn_table = {
333 sk_tcp_set_private_ptr,
334 sk_tcp_get_private_ptr,
339 Socket sk_register(OSSocket sockfd, Plug plug)
344 * Create Socket structure.
346 ret = snew(struct Socket_tag);
347 ret->fn = &tcp_fn_table;
350 bufchain_init(&ret->output_data);
351 ret->writable = 1; /* to start with */
352 ret->sending_oob = 0;
354 ret->frozen_readable = 0;
355 ret->localhost_only = 0; /* unused, but best init anyway */
356 ret->pending_error = 0;
357 ret->oobpending = FALSE;
363 ret->error = error_string(errno);
375 Socket sk_new(SockAddr addr, int port, int privport, int oobinline,
376 int nodelay, Plug plug)
380 struct sockaddr_in6 a6;
382 struct sockaddr_in a;
389 * Create Socket structure.
391 ret = snew(struct Socket_tag);
392 ret->fn = &tcp_fn_table;
395 bufchain_init(&ret->output_data);
396 ret->connected = 0; /* to start with */
397 ret->writable = 0; /* to start with */
398 ret->sending_oob = 0;
400 ret->frozen_readable = 0;
401 ret->localhost_only = 0; /* unused, but best init anyway */
402 ret->pending_error = 0;
403 ret->oobpending = FALSE;
409 assert(addr->family != AF_UNSPEC);
410 s = socket(addr->family, SOCK_STREAM, 0);
414 ret->error = error_string(errno);
418 ret->oobinline = oobinline;
421 setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (void *) &b, sizeof(b));
426 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void *) &b, sizeof(b));
430 * Bind to local address.
433 localport = 1023; /* count from 1023 downwards */
435 localport = 0; /* just use port 0 (ie kernel picks) */
437 /* BSD IP stacks need sockaddr_in zeroed before filling in */
438 memset(&a,'\0',sizeof(struct sockaddr_in));
440 memset(&a6,'\0',sizeof(struct sockaddr_in6));
442 /* Loop round trying to bind */
447 if (addr->family == AF_INET6) {
448 /* XXX use getaddrinfo to get a local address? */
449 a6.sin6_family = AF_INET6;
450 a6.sin6_addr = in6addr_any;
451 a6.sin6_port = htons(localport);
452 retcode = bind(s, (struct sockaddr *) &a6, sizeof(a6));
456 assert(addr->family == AF_INET);
457 a.sin_family = AF_INET;
458 a.sin_addr.s_addr = htonl(INADDR_ANY);
459 a.sin_port = htons(localport);
460 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
467 if (err != EADDRINUSE) /* failed, for a bad reason */
472 break; /* we're only looping once */
475 break; /* we might have got to the end */
479 ret->error = error_string(err);
484 * Connect to remote address.
487 /* XXX would be better to have got getaddrinfo() to fill in the port. */
488 if (addr->family == AF_INET)
489 ((struct sockaddr_in *)addr->ai->ai_addr)->sin_port =
492 assert(addr->family == AF_INET6);
493 ((struct sockaddr_in *)addr->ai->ai_addr)->sin_port =
497 a.sin_family = AF_INET;
498 a.sin_addr.s_addr = htonl(addr->address);
499 a.sin_port = htons((short) port);
502 fl = fcntl(s, F_GETFL);
504 fcntl(s, F_SETFL, fl | O_NONBLOCK);
508 connect(s, addr->ai->ai_addr, addr->ai->ai_addrlen)
510 connect(s, (struct sockaddr *) &a, sizeof(a))
513 if ( errno != EINPROGRESS ) {
514 ret->error = error_string(errno);
519 * If we _don't_ get EWOULDBLOCK, the connect has completed
520 * and we should set the socket as connected and writable.
534 Socket sk_newlistener(char *srcaddr, int port, Plug plug, int local_host_only)
539 struct sockaddr_in6 a6;
541 struct addrinfo hints, *ai;
544 struct sockaddr_in a;
551 * Create Socket structure.
553 ret = snew(struct Socket_tag);
554 ret->fn = &tcp_fn_table;
557 bufchain_init(&ret->output_data);
558 ret->writable = 0; /* to start with */
559 ret->sending_oob = 0;
561 ret->frozen_readable = 0;
562 ret->localhost_only = local_host_only;
563 ret->pending_error = 0;
564 ret->oobpending = FALSE;
570 s = socket(AF_INET, SOCK_STREAM, 0);
574 ret->error = error_string(errno);
580 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)&on, sizeof(on));
582 /* BSD IP stacks need sockaddr_in zeroed before filling in */
583 memset(&a,'\0',sizeof(struct sockaddr_in));
586 memset(&a6,'\0',sizeof(struct sockaddr_in6));
588 hints.ai_flags = AI_NUMERICHOST;
589 hints.ai_family = AF_UNSPEC;
590 hints.ai_socktype = 0;
591 hints.ai_protocol = 0;
592 hints.ai_addrlen = 0;
593 hints.ai_addr = NULL;
594 hints.ai_canonname = NULL;
595 hints.ai_next = NULL;
596 sprintf(portstr, "%d", port);
597 if (srcaddr != NULL && getaddrinfo(srcaddr, portstr, &hints, &ai) == 0)
598 retcode = bind(s, ai->ai_addr, ai->ai_addrlen);
603 * FIXME: Need two listening sockets, in principle, one for v4
607 a6.sin6_addr = in6addr_loopback;
609 a6.sin6_addr = in6addr_any;
610 a6.sin6_port = htons(port);
616 a.sin_family = AF_INET;
619 * Bind to source address. First try an explicitly
623 a.sin_addr.s_addr = inet_addr(srcaddr);
624 if (a.sin_addr.s_addr != INADDR_NONE) {
625 /* Override localhost_only with specified listen addr. */
626 ret->localhost_only = ipv4_is_loopback(a.sin_addr);
632 * ... and failing that, go with one of the standard ones.
636 a.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
638 a.sin_addr.s_addr = htonl(INADDR_ANY);
641 a.sin_port = htons((short)port);
642 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
652 ret->error = error_string(err);
657 if (listen(s, SOMAXCONN) < 0) {
659 ret->error = error_string(errno);
669 static void sk_tcp_close(Socket sock)
671 Actual_Socket s = (Actual_Socket) sock;
679 int sk_getxdmdata(void *sock, unsigned long *ip, int *port)
681 Actual_Socket s = (Actual_Socket) sock;
682 struct sockaddr_in addr;
686 * We must check that this socket really _is_ an Actual_Socket.
688 if (s->fn != &tcp_fn_table)
689 return 0; /* failure */
692 * If we ever implement connecting to a local X server through
693 * a Unix socket, we return 0xFFFFFFFF for the IP address and
694 * our current pid for the port. Bizarre, but such is life.
697 addrlen = sizeof(addr);
698 if (getsockname(s->s, (struct sockaddr *)&addr, &addrlen) < 0 ||
699 addr.sin_family != AF_INET)
702 *ip = ntohl(addr.sin_addr.s_addr);
703 *port = ntohs(addr.sin_port);
709 * The function which tries to send on a socket once it's deemed
712 void try_send(Actual_Socket s)
714 while (s->sending_oob || bufchain_size(&s->output_data) > 0) {
720 if (s->sending_oob) {
721 urgentflag = MSG_OOB;
722 len = s->sending_oob;
726 bufchain_prefix(&s->output_data, &data, &len);
728 nsent = send(s->s, data, len, urgentflag);
729 noise_ultralight(nsent);
731 err = (nsent < 0 ? errno : 0);
732 if (err == EWOULDBLOCK) {
734 * Perfectly normal: we've sent all we can for the moment.
738 } else if (nsent == 0 ||
739 err == ECONNABORTED || err == ECONNRESET) {
741 * If send() returns CONNABORTED or CONNRESET, we
742 * unfortunately can't just call plug_closing(),
743 * because it's quite likely that we're currently
744 * _in_ a call from the code we'd be calling back
745 * to, so we'd have to make half the SSH code
746 * reentrant. Instead we flag a pending error on
747 * the socket, to be dealt with (by calling
748 * plug_closing()) at some suitable future moment.
750 s->pending_error = err;
753 /* We're inside the Unix frontend here, so we know
754 * that the frontend handle is unnecessary. */
755 logevent(NULL, error_string(err));
756 fatalbox("%s", error_string(err));
759 if (s->sending_oob) {
761 memmove(s->oobdata, s->oobdata+nsent, len-nsent);
762 s->sending_oob = len - nsent;
767 bufchain_consume(&s->output_data, nsent);
774 static int sk_tcp_write(Socket sock, const char *buf, int len)
776 Actual_Socket s = (Actual_Socket) sock;
779 * Add the data to the buffer list on the socket.
781 bufchain_add(&s->output_data, buf, len);
784 * Now try sending from the start of the buffer list.
790 * Update the select() status to correctly reflect whether or
791 * not we should be selecting for write.
795 return bufchain_size(&s->output_data);
798 static int sk_tcp_write_oob(Socket sock, const char *buf, int len)
800 Actual_Socket s = (Actual_Socket) sock;
803 * Replace the buffer list on the socket with the data.
805 bufchain_clear(&s->output_data);
806 assert(len <= sizeof(s->oobdata));
807 memcpy(s->oobdata, buf, len);
808 s->sending_oob = len;
811 * Now try sending from the start of the buffer list.
817 * Update the select() status to correctly reflect whether or
818 * not we should be selecting for write.
822 return s->sending_oob;
825 static int net_select_result(int fd, int event)
829 char buf[20480]; /* nice big buffer for plenty of speed */
833 /* Find the Socket structure */
834 s = find234(sktree, &fd, cmpforsearch);
836 return 1; /* boggle */
838 noise_ultralight(event);
841 case 4: /* exceptional */
844 * On a non-oobinline socket, this indicates that we
845 * can immediately perform an OOB read and get back OOB
846 * data, which we will send to the back end with
847 * type==2 (urgent data).
849 ret = recv(s->s, buf, sizeof(buf), MSG_OOB);
850 noise_ultralight(ret);
852 const char *str = (ret == 0 ? "Internal networking trouble" :
853 error_string(errno));
854 /* We're inside the Unix frontend here, so we know
855 * that the frontend handle is unnecessary. */
859 return plug_receive(s->plug, 2, buf, ret);
865 * If we reach here, this is an oobinline socket, which
866 * means we should set s->oobpending and then deal with it
867 * when we get called for the readability event (which
868 * should also occur).
870 s->oobpending = TRUE;
872 case 1: /* readable; also acceptance */
875 * On a listening socket, the readability event means a
876 * connection is ready to be accepted.
878 struct sockaddr_in isa;
879 int addrlen = sizeof(struct sockaddr_in);
880 int t; /* socket of connection */
882 memset(&isa, 0, sizeof(struct sockaddr_in));
884 t = accept(s->s,(struct sockaddr *)&isa,(socklen_t *) &addrlen);
889 if (s->localhost_only && !ipv4_is_loopback(isa.sin_addr)) {
890 close(t); /* someone let nonlocal through?! */
891 } else if (plug_accepting(s->plug, t)) {
892 close(t); /* denied or error */
898 * If we reach here, this is not a listening socket, so
899 * readability really means readability.
902 /* In the case the socket is still frozen, we don't even bother */
904 s->frozen_readable = 1;
909 * We have received data on the socket. For an oobinline
910 * socket, this might be data _before_ an urgent pointer,
911 * in which case we send it to the back end with type==1
912 * (data prior to urgent).
914 if (s->oobinline && s->oobpending) {
916 if (ioctl(s->s, SIOCATMARK, &atmark) == 0 && atmark)
917 s->oobpending = FALSE; /* clear this indicator */
921 ret = recv(s->s, buf, s->oobpending ? 1 : sizeof(buf), 0);
922 noise_ultralight(ret);
924 if (errno == EWOULDBLOCK) {
929 return plug_closing(s->plug, error_string(errno), errno, 0);
930 } else if (0 == ret) {
931 return plug_closing(s->plug, NULL, 0, 0);
933 return plug_receive(s->plug, atmark ? 0 : 1, buf, ret);
936 case 2: /* writable */
939 * select() reports a socket as _writable_ when an
940 * asynchronous connection is completed.
942 s->connected = s->writable = 1;
946 int bufsize_before, bufsize_after;
948 bufsize_before = s->sending_oob + bufchain_size(&s->output_data);
950 bufsize_after = s->sending_oob + bufchain_size(&s->output_data);
951 if (bufsize_after < bufsize_before)
952 plug_sent(s->plug, bufsize_after);
961 * Deal with socket errors detected in try_send().
963 void net_pending_errors(void)
969 * This might be a fiddly business, because it's just possible
970 * that handling a pending error on one socket might cause
971 * others to be closed. (I can't think of any reason this might
972 * happen in current SSH implementation, but to maintain
973 * generality of this network layer I'll assume the worst.)
975 * So what we'll do is search the socket list for _one_ socket
976 * with a pending error, and then handle it, and then search
977 * the list again _from the beginning_. Repeat until we make a
978 * pass with no socket errors present. That way we are
979 * protected against the socket list changing under our feet.
983 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
984 if (s->pending_error) {
986 * An error has occurred on this socket. Pass it to the
989 plug_closing(s->plug, error_string(s->pending_error),
990 s->pending_error, 0);
998 * Each socket abstraction contains a `void *' private field in
999 * which the client can keep state.
1001 static void sk_tcp_set_private_ptr(Socket sock, void *ptr)
1003 Actual_Socket s = (Actual_Socket) sock;
1004 s->private_ptr = ptr;
1007 static void *sk_tcp_get_private_ptr(Socket sock)
1009 Actual_Socket s = (Actual_Socket) sock;
1010 return s->private_ptr;
1014 * Special error values are returned from sk_namelookup and sk_new
1015 * if there's a problem. These functions extract an error message,
1016 * or return NULL if there's no problem.
1018 const char *sk_addr_error(SockAddr addr)
1022 static const char *sk_tcp_socket_error(Socket sock)
1024 Actual_Socket s = (Actual_Socket) sock;
1028 static void sk_tcp_set_frozen(Socket sock, int is_frozen)
1030 Actual_Socket s = (Actual_Socket) sock;
1031 if (s->frozen == is_frozen)
1033 s->frozen = is_frozen;
1034 if (!is_frozen && s->frozen_readable) {
1036 recv(s->s, &c, 1, MSG_PEEK);
1038 s->frozen_readable = 0;
1042 static void uxsel_tell(Actual_Socket s)
1046 rwx |= 2; /* write == connect */
1047 if (s->connected && !s->frozen)
1048 rwx |= 1 | 4; /* read, except */
1049 if (bufchain_size(&s->output_data))
1050 rwx |= 2; /* write */
1052 rwx |= 1; /* read == accept */
1053 uxsel_set(s->s, rwx, net_select_result);
1056 int net_service_lookup(char *service)
1059 se = getservbyname(service, NULL);
1061 return ntohs(se->s_port);