2 * Unix networking abstraction.
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/ioctl.h>
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 #include <netinet/tcp.h>
19 #define DEFINE_PLUG_METHOD_MACROS
24 #define ipv4_is_loopback(addr) (inet_netof(addr) == IN_LOOPBACKNET)
27 struct socket_function_table *fn;
28 /* the above variable absolutely *must* be the first in this structure */
36 int frozen; /* this causes readability notifications to be ignored */
37 int frozen_readable; /* this means we missed at least one readability
38 * notification while we were frozen */
39 int localhost_only; /* for listening sockets */
42 int oobpending; /* is there OOB data available to read? */
44 int pending_error; /* in case send() returns error */
49 * We used to typedef struct Socket_tag *Socket.
51 * Since we have made the networking abstraction slightly more
52 * abstract, Socket no longer means a tcp socket (it could mean
53 * an ssl socket). So now we must use Actual_Socket when we know
54 * we are talking about a tcp socket.
56 typedef struct Socket_tag *Actual_Socket;
60 /* address family this belongs to, AF_INET for IPv4, AF_INET6 for IPv6. */
62 unsigned long address; /* Address IPv4 style. */
64 struct addrinfo *ai; /* Address IPv6 style. */
68 static tree234 *sktree;
70 static int cmpfortree(void *av, void *bv)
72 Actual_Socket a = (Actual_Socket) av, b = (Actual_Socket) bv;
73 int as = a->s, bs = b->s;
81 static int cmpforsearch(void *av, void *bv)
83 Actual_Socket b = (Actual_Socket) bv;
84 int as = (int) av, bs = b->s;
94 sktree = newtree234(cmpfortree);
103 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
109 char *error_string(int error)
111 return strerror(error);
114 SockAddr sk_namelookup(char *host, char **canonicalname)
116 SockAddr ret = smalloc(sizeof(struct SockAddr_tag));
118 struct hostent *h = NULL;
121 /* Clear the structure and default to IPv4. */
122 memset(ret, 0, sizeof(struct SockAddr_tag));
123 ret->family = 0; /* We set this one when we have resolved the host. */
127 if ((a = inet_addr(host)) == (unsigned long) INADDR_NONE) {
129 if (getaddrinfo(host, NULL, NULL, &ret->ai) == 0) {
130 ret->family = ret->ai->ai_family;
135 * Otherwise use the IPv4-only gethostbyname... (NOTE:
136 * we don't use gethostbyname as a fallback!)
138 if (ret->family == 0) {
139 /*debug(("Resolving \"%s\" with gethostbyname() (IPv4 only)...\n", host)); */
140 if ( (h = gethostbyname(host)) )
141 ret->family = AF_INET;
143 if (ret->family == 0) {
144 ret->error = (h_errno == HOST_NOT_FOUND ||
145 h_errno == NO_DATA ||
146 h_errno == NO_ADDRESS ? "Host does not exist" :
147 h_errno == TRY_AGAIN ?
148 "Temporary name service failure" :
149 "gethostbyname: unknown error");
155 /* If we got an address info use that... */
158 /* Are we in IPv4 fallback mode? */
159 /* We put the IPv4 address into the a variable so we can further-on use the IPv4 code... */
160 if (ret->family == AF_INET)
162 (char *) &((struct sockaddr_in *) ret->ai->
163 ai_addr)->sin_addr, sizeof(a));
165 /* Now let's find that canonicalname... */
166 if (getnameinfo((struct sockaddr *) ret->ai->ai_addr,
168 AF_INET ? sizeof(struct sockaddr_in) :
169 sizeof(struct sockaddr_in6), realhost,
170 sizeof(realhost), NULL, 0, 0) != 0) {
171 strncpy(realhost, host, sizeof(realhost));
174 /* We used the IPv4-only gethostbyname()... */
178 memcpy(&a, h->h_addr, sizeof(a));
179 /* This way we are always sure the h->h_name is valid :) */
180 strncpy(realhost, h->h_name, sizeof(realhost));
184 * This must be a numeric IPv4 address because it caused a
185 * success return from inet_addr.
187 ret->family = AF_INET;
188 strncpy(realhost, host, sizeof(realhost));
190 ret->address = ntohl(a);
191 realhost[lenof(realhost)-1] = '\0';
192 *canonicalname = smalloc(1+strlen(realhost));
193 strcpy(*canonicalname, realhost);
197 void sk_getaddr(SockAddr addr, char *buf, int buflen)
200 if (addr->family == AF_INET) {
203 a.s_addr = htonl(addr->address);
204 strncpy(buf, inet_ntoa(a), buflen);
207 FIXME; /* I don't know how to get a text form of an IPv6 address. */
212 int sk_addrtype(SockAddr addr)
214 return (addr->family == AF_INET ? ADDRTYPE_IPV4 : ADDRTYPE_IPV6);
217 void sk_addrcopy(SockAddr addr, char *buf)
220 if (addr->family == AF_INET) {
223 a.s_addr = htonl(addr->address);
224 memcpy(buf, (char*) &a.s_addr, 4);
227 memcpy(buf, (char*) addr->ai, 16);
232 void sk_addr_free(SockAddr addr)
237 static Plug sk_tcp_plug(Socket sock, Plug p)
239 Actual_Socket s = (Actual_Socket) sock;
246 static void sk_tcp_flush(Socket s)
249 * We send data to the socket as soon as we can anyway,
250 * so we don't need to do anything here. :-)
254 static void sk_tcp_close(Socket s);
255 static int sk_tcp_write(Socket s, char *data, int len);
256 static int sk_tcp_write_oob(Socket s, char *data, int len);
257 static void sk_tcp_set_private_ptr(Socket s, void *ptr);
258 static void *sk_tcp_get_private_ptr(Socket s);
259 static void sk_tcp_set_frozen(Socket s, int is_frozen);
260 static char *sk_tcp_socket_error(Socket s);
262 Socket sk_register(void *sock, Plug plug)
264 static struct socket_function_table fn_table = {
270 sk_tcp_set_private_ptr,
271 sk_tcp_get_private_ptr,
279 * Create Socket structure.
281 ret = smalloc(sizeof(struct Socket_tag));
285 bufchain_init(&ret->output_data);
286 ret->writable = 1; /* to start with */
287 ret->sending_oob = 0;
289 ret->frozen_readable = 0;
290 ret->localhost_only = 0; /* unused, but best init anyway */
291 ret->pending_error = 0;
292 ret->oobpending = FALSE;
298 ret->error = error_string(errno);
309 Socket sk_new(SockAddr addr, int port, int privport, int oobinline,
310 int nodelay, Plug plug)
312 static struct socket_function_table fn_table = {
318 sk_tcp_set_private_ptr,
319 sk_tcp_get_private_ptr,
326 struct sockaddr_in6 a6;
328 struct sockaddr_in a;
334 * Create Socket structure.
336 ret = smalloc(sizeof(struct Socket_tag));
340 bufchain_init(&ret->output_data);
341 ret->connected = 0; /* to start with */
342 ret->writable = 0; /* to start with */
343 ret->sending_oob = 0;
345 ret->frozen_readable = 0;
346 ret->localhost_only = 0; /* unused, but best init anyway */
347 ret->pending_error = 0;
348 ret->oobpending = FALSE;
354 s = socket(addr->family, SOCK_STREAM, 0);
358 ret->error = error_string(errno);
362 ret->oobinline = oobinline;
365 setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (void *) &b, sizeof(b));
370 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void *) &b, sizeof(b));
374 * Bind to local address.
377 localport = 1023; /* count from 1023 downwards */
379 localport = 0; /* just use port 0 (ie kernel picks) */
381 /* Loop round trying to bind */
386 if (addr->family == AF_INET6) {
387 memset(&a6, 0, sizeof(a6));
388 a6.sin6_family = AF_INET6;
389 /*a6.sin6_addr = in6addr_any; *//* == 0 */
390 a6.sin6_port = htons(localport);
394 a.sin_family = AF_INET;
395 a.sin_addr.s_addr = htonl(INADDR_ANY);
396 a.sin_port = htons(localport);
399 retcode = bind(s, (addr->family == AF_INET6 ?
400 (struct sockaddr *) &a6 :
401 (struct sockaddr *) &a),
403 AF_INET6 ? sizeof(a6) : sizeof(a)));
405 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
412 if (err != EADDRINUSE) /* failed, for a bad reason */
417 break; /* we're only looping once */
420 break; /* we might have got to the end */
424 ret->error = error_string(err);
429 * Connect to remote address.
432 if (addr->family == AF_INET6) {
433 memset(&a, 0, sizeof(a));
434 a6.sin6_family = AF_INET6;
435 a6.sin6_port = htons((short) port);
437 ((struct sockaddr_in6 *) addr->ai->ai_addr)->sin6_addr;
441 a.sin_family = AF_INET;
442 a.sin_addr.s_addr = htonl(addr->address);
443 a.sin_port = htons((short) port);
448 connect(s, ((addr->family == AF_INET6) ?
449 (struct sockaddr *) &a6 : (struct sockaddr *) &a),
450 (addr->family == AF_INET6) ? sizeof(a6) : sizeof(a))
452 connect(s, (struct sockaddr *) &a, sizeof(a))
456 * FIXME: We are prepared to receive EWOULDBLOCK here,
457 * because we might want the connection to be made
458 * asynchronously; but how do we actually arrange this in
461 if ( errno != EWOULDBLOCK ) {
462 ret->error = error_string(errno);
467 * If we _don't_ get EWOULDBLOCK, the connect has completed
468 * and we should set the socket as connected and writable.
479 Socket sk_newlistener(char *srcaddr, int port, Plug plug, int local_host_only)
481 static struct socket_function_table fn_table = {
487 sk_tcp_set_private_ptr,
488 sk_tcp_get_private_ptr,
495 struct sockaddr_in6 a6;
497 struct sockaddr_in a;
504 * Create Socket structure.
506 ret = smalloc(sizeof(struct Socket_tag));
510 bufchain_init(&ret->output_data);
511 ret->writable = 0; /* to start with */
512 ret->sending_oob = 0;
514 ret->frozen_readable = 0;
515 ret->localhost_only = local_host_only;
516 ret->pending_error = 0;
517 ret->oobpending = FALSE;
523 s = socket(AF_INET, SOCK_STREAM, 0);
527 ret->error = error_string(errno);
533 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)&on, sizeof(on));
536 if (addr->family == AF_INET6) {
537 memset(&a6, 0, sizeof(a6));
538 a6.sin6_family = AF_INET6;
539 /* FIXME: srcaddr is ignored for IPv6, because I (SGT) don't
540 * know how to do it. :-) */
542 a6.sin6_addr = in6addr_loopback;
544 a6.sin6_addr = in6addr_any;
545 a6.sin6_port = htons(port);
550 a.sin_family = AF_INET;
553 * Bind to source address. First try an explicitly
557 a.sin_addr.s_addr = inet_addr(srcaddr);
558 if (a.sin_addr.s_addr != INADDR_NONE) {
559 /* Override localhost_only with specified listen addr. */
560 ret->localhost_only = ipv4_is_loopback(a.sin_addr);
566 * ... and failing that, go with one of the standard ones.
570 a.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
572 a.sin_addr.s_addr = htonl(INADDR_ANY);
575 a.sin_port = htons((short)port);
578 retcode = bind(s, (addr->family == AF_INET6 ?
579 (struct sockaddr *) &a6 :
580 (struct sockaddr *) &a),
582 AF_INET6 ? sizeof(a6) : sizeof(a)));
584 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
593 ret->error = error_string(err);
598 if (listen(s, SOMAXCONN) < 0) {
600 ret->error = error_string(errno);
609 static void sk_tcp_close(Socket sock)
611 Actual_Socket s = (Actual_Socket) sock;
619 * The function which tries to send on a socket once it's deemed
622 void try_send(Actual_Socket s)
624 while (s->sending_oob || bufchain_size(&s->output_data) > 0) {
630 if (s->sending_oob) {
631 urgentflag = MSG_OOB;
632 len = s->sending_oob;
636 bufchain_prefix(&s->output_data, &data, &len);
638 nsent = send(s->s, data, len, urgentflag);
639 noise_ultralight(nsent);
641 err = (nsent < 0 ? errno : 0);
642 if (err == EWOULDBLOCK) {
644 * Perfectly normal: we've sent all we can for the moment.
648 } else if (nsent == 0 ||
649 err == ECONNABORTED || err == ECONNRESET) {
651 * If send() returns CONNABORTED or CONNRESET, we
652 * unfortunately can't just call plug_closing(),
653 * because it's quite likely that we're currently
654 * _in_ a call from the code we'd be calling back
655 * to, so we'd have to make half the SSH code
656 * reentrant. Instead we flag a pending error on
657 * the socket, to be dealt with (by calling
658 * plug_closing()) at some suitable future moment.
660 s->pending_error = err;
663 /* We're inside the Unix frontend here, so we know
664 * that the frontend handle is unnecessary. */
665 logevent(NULL, error_string(err));
666 fatalbox("%s", error_string(err));
669 if (s->sending_oob) {
671 memmove(s->oobdata, s->oobdata+nsent, len-nsent);
672 s->sending_oob = len - nsent;
677 bufchain_consume(&s->output_data, nsent);
683 static int sk_tcp_write(Socket sock, char *buf, int len)
685 Actual_Socket s = (Actual_Socket) sock;
688 * Add the data to the buffer list on the socket.
690 bufchain_add(&s->output_data, buf, len);
693 * Now try sending from the start of the buffer list.
698 return bufchain_size(&s->output_data);
701 static int sk_tcp_write_oob(Socket sock, char *buf, int len)
703 Actual_Socket s = (Actual_Socket) sock;
706 * Replace the buffer list on the socket with the data.
708 bufchain_clear(&s->output_data);
709 assert(len <= sizeof(s->oobdata));
710 memcpy(s->oobdata, buf, len);
711 s->sending_oob = len;
714 * Now try sending from the start of the buffer list.
719 return s->sending_oob;
722 int select_result(int fd, int event)
726 char buf[20480]; /* nice big buffer for plenty of speed */
730 /* Find the Socket structure */
731 s = find234(sktree, (void *) fd, cmpforsearch);
733 return 1; /* boggle */
735 noise_ultralight(event);
738 #ifdef FIXME_NONBLOCKING_CONNECTIONS
739 case FIXME: /* connected */
740 s->connected = s->writable = 1;
743 case 4: /* exceptional */
746 * On a non-oobinline socket, this indicates that we
747 * can immediately perform an OOB read and get back OOB
748 * data, which we will send to the back end with
749 * type==2 (urgent data).
751 ret = recv(s->s, buf, sizeof(buf), MSG_OOB);
752 noise_ultralight(ret);
754 char *str = (ret == 0 ? "Internal networking trouble" :
755 error_string(errno));
756 /* We're inside the Unix frontend here, so we know
757 * that the frontend handle is unnecessary. */
761 return plug_receive(s->plug, 2, buf, ret);
767 * If we reach here, this is an oobinline socket, which
768 * means we should set s->oobpending and then deal with it
769 * when we get called for the readability event (which
770 * should also occur).
772 s->oobpending = TRUE;
774 case 1: /* readable; also acceptance */
777 * On a listening socket, the readability event means a
778 * connection is ready to be accepted.
780 struct sockaddr_in isa;
781 int addrlen = sizeof(struct sockaddr_in);
782 int t; /* socket of connection */
784 memset(&isa, 0, sizeof(struct sockaddr_in));
786 t = accept(s->s,(struct sockaddr *)&isa,&addrlen);
791 if (s->localhost_only && !ipv4_is_loopback(isa.sin_addr)) {
792 close(t); /* someone let nonlocal through?! */
793 } else if (plug_accepting(s->plug, (void*)t)) {
794 close(t); /* denied or error */
800 * If we reach here, this is not a listening socket, so
801 * readability really means readability.
804 /* In the case the socket is still frozen, we don't even bother */
806 s->frozen_readable = 1;
811 * We have received data on the socket. For an oobinline
812 * socket, this might be data _before_ an urgent pointer,
813 * in which case we send it to the back end with type==1
814 * (data prior to urgent).
816 if (s->oobinline && s->oobpending) {
818 if (ioctl(s->s, SIOCATMARK, &atmark) == 0 && atmark)
819 s->oobpending = FALSE; /* clear this indicator */
823 ret = recv(s->s, buf, s->oobpending ? 1 : sizeof(buf), 0);
824 noise_ultralight(ret);
826 if (errno == EWOULDBLOCK) {
831 return plug_closing(s->plug, error_string(errno), errno, 0);
832 } else if (0 == ret) {
833 return plug_closing(s->plug, NULL, 0, 0);
835 return plug_receive(s->plug, atmark ? 0 : 1, buf, ret);
838 case 2: /* writable */
840 int bufsize_before, bufsize_after;
842 bufsize_before = s->sending_oob + bufchain_size(&s->output_data);
844 bufsize_after = s->sending_oob + bufchain_size(&s->output_data);
845 if (bufsize_after < bufsize_before)
846 plug_sent(s->plug, bufsize_after);
855 * Deal with socket errors detected in try_send().
857 void net_pending_errors(void)
863 * This might be a fiddly business, because it's just possible
864 * that handling a pending error on one socket might cause
865 * others to be closed. (I can't think of any reason this might
866 * happen in current SSH implementation, but to maintain
867 * generality of this network layer I'll assume the worst.)
869 * So what we'll do is search the socket list for _one_ socket
870 * with a pending error, and then handle it, and then search
871 * the list again _from the beginning_. Repeat until we make a
872 * pass with no socket errors present. That way we are
873 * protected against the socket list changing under our feet.
877 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
878 if (s->pending_error) {
880 * An error has occurred on this socket. Pass it to the
883 plug_closing(s->plug, error_string(s->pending_error),
884 s->pending_error, 0);
892 * Each socket abstraction contains a `void *' private field in
893 * which the client can keep state.
895 static void sk_tcp_set_private_ptr(Socket sock, void *ptr)
897 Actual_Socket s = (Actual_Socket) sock;
898 s->private_ptr = ptr;
901 static void *sk_tcp_get_private_ptr(Socket sock)
903 Actual_Socket s = (Actual_Socket) sock;
904 return s->private_ptr;
908 * Special error values are returned from sk_namelookup and sk_new
909 * if there's a problem. These functions extract an error message,
910 * or return NULL if there's no problem.
912 char *sk_addr_error(SockAddr addr)
916 static char *sk_tcp_socket_error(Socket sock)
918 Actual_Socket s = (Actual_Socket) sock;
922 static void sk_tcp_set_frozen(Socket sock, int is_frozen)
924 Actual_Socket s = (Actual_Socket) sock;
925 if (s->frozen == is_frozen)
927 s->frozen = is_frozen;
928 if (!is_frozen && s->frozen_readable) {
930 recv(s->s, &c, 1, MSG_PEEK);
932 s->frozen_readable = 0;
936 * For Unix select()-based frontends: enumerate all sockets
937 * currently active, and state whether we currently wish to receive
938 * select events on them for reading, writing and exceptional
941 static void set_rwx(Actual_Socket s, int *rwx)
944 if (s->connected && !s->frozen)
945 val |= 1 | 4; /* read, except */
946 if (bufchain_size(&s->output_data))
947 val |= 2; /* write */
949 val |= 1; /* read == accept */
953 int first_socket(int *state, int *rwx)
957 s = index234(sktree, (*state)++);
960 return s ? s->s : -1;
963 int next_socket(int *state, int *rwx)
965 Actual_Socket s = index234(sktree, (*state)++);
968 return s ? s->s : -1;
971 int net_service_lookup(char *service)
974 se = getservbyname(service, NULL);
976 return ntohs(se->s_port);