2 * Unix networking abstraction.
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/ioctl.h>
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 #include <netinet/tcp.h>
19 #define DEFINE_PLUG_METHOD_MACROS
24 #define ipv4_is_loopback(addr) (inet_netof(addr) == IN_LOOPBACKNET)
27 struct socket_function_table *fn;
28 /* the above variable absolutely *must* be the first in this structure */
36 int frozen; /* this causes readability notifications to be ignored */
37 int frozen_readable; /* this means we missed at least one readability
38 * notification while we were frozen */
39 int localhost_only; /* for listening sockets */
42 int oobpending; /* is there OOB data available to read? */
44 int pending_error; /* in case send() returns error */
49 * We used to typedef struct Socket_tag *Socket.
51 * Since we have made the networking abstraction slightly more
52 * abstract, Socket no longer means a tcp socket (it could mean
53 * an ssl socket). So now we must use Actual_Socket when we know
54 * we are talking about a tcp socket.
56 typedef struct Socket_tag *Actual_Socket;
61 * Which address family this address belongs to. AF_INET for
62 * IPv4; AF_INET6 for IPv6; AF_UNSPEC indicates that name
63 * resolution has not been done and a simple host name is held
64 * in this SockAddr structure.
67 unsigned long address; /* Address IPv4 style. */
69 struct addrinfo *ai; /* Address IPv6 style. */
71 char hostname[512]; /* Store an unresolved host name. */
74 static tree234 *sktree;
76 static void uxsel_tell(Actual_Socket s);
78 static int cmpfortree(void *av, void *bv)
80 Actual_Socket a = (Actual_Socket) av, b = (Actual_Socket) bv;
81 int as = a->s, bs = b->s;
89 static int cmpforsearch(void *av, void *bv)
91 Actual_Socket b = (Actual_Socket) bv;
92 int as = (int) av, bs = b->s;
102 sktree = newtree234(cmpfortree);
105 void sk_cleanup(void)
111 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
117 char *error_string(int error)
119 return strerror(error);
122 SockAddr sk_namelookup(const char *host, char **canonicalname)
124 SockAddr ret = snew(struct SockAddr_tag);
126 struct hostent *h = NULL;
129 /* Clear the structure and default to IPv4. */
130 memset(ret, 0, sizeof(struct SockAddr_tag));
131 ret->family = 0; /* We set this one when we have resolved the host. */
135 if ((a = inet_addr(host)) == (unsigned long) INADDR_NONE) {
137 if (getaddrinfo(host, NULL, NULL, &ret->ai) == 0) {
138 ret->family = ret->ai->ai_family;
143 * Otherwise use the IPv4-only gethostbyname... (NOTE:
144 * we don't use gethostbyname as a fallback!)
146 if (ret->family == 0) {
147 /*debug(("Resolving \"%s\" with gethostbyname() (IPv4 only)...\n", host)); */
148 if ( (h = gethostbyname(host)) )
149 ret->family = AF_INET;
151 if (ret->family == 0) {
152 ret->error = (h_errno == HOST_NOT_FOUND ||
153 h_errno == NO_DATA ||
154 h_errno == NO_ADDRESS ? "Host does not exist" :
155 h_errno == TRY_AGAIN ?
156 "Temporary name service failure" :
157 "gethostbyname: unknown error");
163 /* If we got an address info use that... */
166 /* Are we in IPv4 fallback mode? */
167 /* We put the IPv4 address into the a variable so we can further-on use the IPv4 code... */
168 if (ret->family == AF_INET)
170 (char *) &((struct sockaddr_in *) ret->ai->
171 ai_addr)->sin_addr, sizeof(a));
173 /* Now let's find that canonicalname... */
174 if (getnameinfo((struct sockaddr *) ret->ai->ai_addr,
176 AF_INET ? sizeof(struct sockaddr_in) :
177 sizeof(struct sockaddr_in6), realhost,
178 sizeof(realhost), NULL, 0, 0) != 0) {
179 strncpy(realhost, host, sizeof(realhost));
182 /* We used the IPv4-only gethostbyname()... */
186 memcpy(&a, h->h_addr, sizeof(a));
187 /* This way we are always sure the h->h_name is valid :) */
188 strncpy(realhost, h->h_name, sizeof(realhost));
192 * This must be a numeric IPv4 address because it caused a
193 * success return from inet_addr.
195 ret->family = AF_INET;
196 strncpy(realhost, host, sizeof(realhost));
198 ret->address = ntohl(a);
199 realhost[lenof(realhost)-1] = '\0';
200 *canonicalname = snewn(1+strlen(realhost), char);
201 strcpy(*canonicalname, realhost);
205 SockAddr sk_nonamelookup(const char *host)
207 SockAddr ret = snew(struct SockAddr_tag);
209 ret->family = AF_UNSPEC;
210 strncpy(ret->hostname, host, lenof(ret->hostname));
211 ret->hostname[lenof(ret->hostname)-1] = '\0';
215 void sk_getaddr(SockAddr addr, char *buf, int buflen)
218 if (addr->family == AF_INET6) {
219 FIXME; /* I don't know how to get a text form of an IPv6 address. */
222 if (addr->family == AF_INET) {
224 a.s_addr = htonl(addr->address);
225 strncpy(buf, inet_ntoa(a), buflen);
226 buf[buflen-1] = '\0';
228 assert(addr->family == AF_UNSPEC);
229 strncpy(buf, addr->hostname, buflen);
230 buf[buflen-1] = '\0';
234 int sk_hostname_is_local(char *name)
236 return !strcmp(name, "localhost");
239 int sk_address_is_local(SockAddr addr)
242 if (addr->family == AF_INET6) {
243 FIXME; /* someone who can compile for IPV6 had better do this bit */
246 if (addr->family == AF_INET) {
248 a.s_addr = htonl(addr->address);
249 return ipv4_is_loopback(a);
251 assert(addr->family == AF_UNSPEC);
252 return 0; /* we don't know; assume not */
256 int sk_addrtype(SockAddr addr)
258 return (addr->family == AF_INET ? ADDRTYPE_IPV4 :
260 addr->family == AF_INET6 ? ADDRTYPE_IPV6 :
265 void sk_addrcopy(SockAddr addr, char *buf)
267 assert(addr->family != AF_UNSPEC);
269 if (addr->family == AF_INET6) {
270 memcpy(buf, (char*) addr->ai, 16);
273 if (addr->family == AF_INET) {
275 a.s_addr = htonl(addr->address);
276 memcpy(buf, (char*) &a.s_addr, 4);
280 void sk_addr_free(SockAddr addr)
285 static Plug sk_tcp_plug(Socket sock, Plug p)
287 Actual_Socket s = (Actual_Socket) sock;
294 static void sk_tcp_flush(Socket s)
297 * We send data to the socket as soon as we can anyway,
298 * so we don't need to do anything here. :-)
302 static void sk_tcp_close(Socket s);
303 static int sk_tcp_write(Socket s, const char *data, int len);
304 static int sk_tcp_write_oob(Socket s, const char *data, int len);
305 static void sk_tcp_set_private_ptr(Socket s, void *ptr);
306 static void *sk_tcp_get_private_ptr(Socket s);
307 static void sk_tcp_set_frozen(Socket s, int is_frozen);
308 static char *sk_tcp_socket_error(Socket s);
310 static struct socket_function_table tcp_fn_table = {
316 sk_tcp_set_private_ptr,
317 sk_tcp_get_private_ptr,
322 Socket sk_register(void *sock, Plug plug)
327 * Create Socket structure.
329 ret = snew(struct Socket_tag);
330 ret->fn = &tcp_fn_table;
333 bufchain_init(&ret->output_data);
334 ret->writable = 1; /* to start with */
335 ret->sending_oob = 0;
337 ret->frozen_readable = 0;
338 ret->localhost_only = 0; /* unused, but best init anyway */
339 ret->pending_error = 0;
340 ret->oobpending = FALSE;
346 ret->error = error_string(errno);
358 Socket sk_new(SockAddr addr, int port, int privport, int oobinline,
359 int nodelay, Plug plug)
363 struct sockaddr_in6 a6;
365 struct sockaddr_in a;
371 * Create Socket structure.
373 ret = snew(struct Socket_tag);
374 ret->fn = &tcp_fn_table;
377 bufchain_init(&ret->output_data);
378 ret->connected = 0; /* to start with */
379 ret->writable = 0; /* to start with */
380 ret->sending_oob = 0;
382 ret->frozen_readable = 0;
383 ret->localhost_only = 0; /* unused, but best init anyway */
384 ret->pending_error = 0;
385 ret->oobpending = FALSE;
391 assert(addr->family != AF_UNSPEC);
392 s = socket(addr->family, SOCK_STREAM, 0);
396 ret->error = error_string(errno);
400 ret->oobinline = oobinline;
403 setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (void *) &b, sizeof(b));
408 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void *) &b, sizeof(b));
412 * Bind to local address.
415 localport = 1023; /* count from 1023 downwards */
417 localport = 0; /* just use port 0 (ie kernel picks) */
419 /* Loop round trying to bind */
424 if (addr->family == AF_INET6) {
425 memset(&a6, 0, sizeof(a6));
426 a6.sin6_family = AF_INET6;
427 /*a6.sin6_addr = in6addr_any; *//* == 0 */
428 a6.sin6_port = htons(localport);
432 a.sin_family = AF_INET;
433 a.sin_addr.s_addr = htonl(INADDR_ANY);
434 a.sin_port = htons(localport);
437 retcode = bind(s, (addr->family == AF_INET6 ?
438 (struct sockaddr *) &a6 :
439 (struct sockaddr *) &a),
441 AF_INET6 ? sizeof(a6) : sizeof(a)));
443 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
450 if (err != EADDRINUSE) /* failed, for a bad reason */
455 break; /* we're only looping once */
458 break; /* we might have got to the end */
462 ret->error = error_string(err);
467 * Connect to remote address.
470 if (addr->family == AF_INET6) {
471 memset(&a, 0, sizeof(a));
472 a6.sin6_family = AF_INET6;
473 a6.sin6_port = htons((short) port);
475 ((struct sockaddr_in6 *) addr->ai->ai_addr)->sin6_addr;
479 a.sin_family = AF_INET;
480 a.sin_addr.s_addr = htonl(addr->address);
481 a.sin_port = htons((short) port);
486 ioctl(s, FIONBIO, &i);
491 connect(s, ((addr->family == AF_INET6) ?
492 (struct sockaddr *) &a6 : (struct sockaddr *) &a),
493 (addr->family == AF_INET6) ? sizeof(a6) : sizeof(a))
495 connect(s, (struct sockaddr *) &a, sizeof(a))
498 if ( errno != EINPROGRESS ) {
499 ret->error = error_string(errno);
504 * If we _don't_ get EWOULDBLOCK, the connect has completed
505 * and we should set the socket as connected and writable.
517 Socket sk_newlistener(char *srcaddr, int port, Plug plug, int local_host_only)
521 struct sockaddr_in6 a6;
523 struct sockaddr_in a;
530 * Create Socket structure.
532 ret = snew(struct Socket_tag);
533 ret->fn = &tcp_fn_table;
536 bufchain_init(&ret->output_data);
537 ret->writable = 0; /* to start with */
538 ret->sending_oob = 0;
540 ret->frozen_readable = 0;
541 ret->localhost_only = local_host_only;
542 ret->pending_error = 0;
543 ret->oobpending = FALSE;
549 s = socket(AF_INET, SOCK_STREAM, 0);
553 ret->error = error_string(errno);
559 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)&on, sizeof(on));
562 if (addr->family == AF_INET6) {
563 memset(&a6, 0, sizeof(a6));
564 a6.sin6_family = AF_INET6;
565 /* FIXME: srcaddr is ignored for IPv6, because I (SGT) don't
566 * know how to do it. :-) */
568 a6.sin6_addr = in6addr_loopback;
570 a6.sin6_addr = in6addr_any;
571 a6.sin6_port = htons(port);
576 a.sin_family = AF_INET;
579 * Bind to source address. First try an explicitly
583 a.sin_addr.s_addr = inet_addr(srcaddr);
584 if (a.sin_addr.s_addr != INADDR_NONE) {
585 /* Override localhost_only with specified listen addr. */
586 ret->localhost_only = ipv4_is_loopback(a.sin_addr);
592 * ... and failing that, go with one of the standard ones.
596 a.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
598 a.sin_addr.s_addr = htonl(INADDR_ANY);
601 a.sin_port = htons((short)port);
604 retcode = bind(s, (addr->family == AF_INET6 ?
605 (struct sockaddr *) &a6 :
606 (struct sockaddr *) &a),
608 AF_INET6 ? sizeof(a6) : sizeof(a)));
610 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
619 ret->error = error_string(err);
624 if (listen(s, SOMAXCONN) < 0) {
626 ret->error = error_string(errno);
636 static void sk_tcp_close(Socket sock)
638 Actual_Socket s = (Actual_Socket) sock;
646 int sk_getxdmdata(void *sock, unsigned long *ip, int *port)
648 Actual_Socket s = (Actual_Socket) sock;
649 struct sockaddr_in addr;
653 * We must check that this socket really _is_ an Actual_Socket.
655 if (s->fn != &tcp_fn_table)
656 return 0; /* failure */
659 * If we ever implement connecting to a local X server through
660 * a Unix socket, we return 0xFFFFFFFF for the IP address and
661 * our current pid for the port. Bizarre, but such is life.
664 addrlen = sizeof(addr);
665 if (getsockname(s->s, (struct sockaddr *)&addr, &addrlen) < 0 ||
666 addr.sin_family != AF_INET)
669 *ip = ntohl(addr.sin_addr.s_addr);
670 *port = ntohs(addr.sin_port);
676 * The function which tries to send on a socket once it's deemed
679 void try_send(Actual_Socket s)
681 while (s->sending_oob || bufchain_size(&s->output_data) > 0) {
687 if (s->sending_oob) {
688 urgentflag = MSG_OOB;
689 len = s->sending_oob;
693 bufchain_prefix(&s->output_data, &data, &len);
695 nsent = send(s->s, data, len, urgentflag);
696 noise_ultralight(nsent);
698 err = (nsent < 0 ? errno : 0);
699 if (err == EWOULDBLOCK) {
701 * Perfectly normal: we've sent all we can for the moment.
705 } else if (nsent == 0 ||
706 err == ECONNABORTED || err == ECONNRESET) {
708 * If send() returns CONNABORTED or CONNRESET, we
709 * unfortunately can't just call plug_closing(),
710 * because it's quite likely that we're currently
711 * _in_ a call from the code we'd be calling back
712 * to, so we'd have to make half the SSH code
713 * reentrant. Instead we flag a pending error on
714 * the socket, to be dealt with (by calling
715 * plug_closing()) at some suitable future moment.
717 s->pending_error = err;
720 /* We're inside the Unix frontend here, so we know
721 * that the frontend handle is unnecessary. */
722 logevent(NULL, error_string(err));
723 fatalbox("%s", error_string(err));
726 if (s->sending_oob) {
728 memmove(s->oobdata, s->oobdata+nsent, len-nsent);
729 s->sending_oob = len - nsent;
734 bufchain_consume(&s->output_data, nsent);
741 static int sk_tcp_write(Socket sock, const char *buf, int len)
743 Actual_Socket s = (Actual_Socket) sock;
746 * Add the data to the buffer list on the socket.
748 bufchain_add(&s->output_data, buf, len);
751 * Now try sending from the start of the buffer list.
756 return bufchain_size(&s->output_data);
759 static int sk_tcp_write_oob(Socket sock, const char *buf, int len)
761 Actual_Socket s = (Actual_Socket) sock;
764 * Replace the buffer list on the socket with the data.
766 bufchain_clear(&s->output_data);
767 assert(len <= sizeof(s->oobdata));
768 memcpy(s->oobdata, buf, len);
769 s->sending_oob = len;
772 * Now try sending from the start of the buffer list.
777 return s->sending_oob;
780 static int net_select_result(int fd, int event)
784 char buf[20480]; /* nice big buffer for plenty of speed */
788 /* Find the Socket structure */
789 s = find234(sktree, (void *) fd, cmpforsearch);
791 return 1; /* boggle */
793 noise_ultralight(event);
796 case 4: /* exceptional */
799 * On a non-oobinline socket, this indicates that we
800 * can immediately perform an OOB read and get back OOB
801 * data, which we will send to the back end with
802 * type==2 (urgent data).
804 ret = recv(s->s, buf, sizeof(buf), MSG_OOB);
805 noise_ultralight(ret);
807 char *str = (ret == 0 ? "Internal networking trouble" :
808 error_string(errno));
809 /* We're inside the Unix frontend here, so we know
810 * that the frontend handle is unnecessary. */
814 return plug_receive(s->plug, 2, buf, ret);
820 * If we reach here, this is an oobinline socket, which
821 * means we should set s->oobpending and then deal with it
822 * when we get called for the readability event (which
823 * should also occur).
825 s->oobpending = TRUE;
827 case 1: /* readable; also acceptance */
830 * On a listening socket, the readability event means a
831 * connection is ready to be accepted.
833 struct sockaddr_in isa;
834 int addrlen = sizeof(struct sockaddr_in);
835 int t; /* socket of connection */
837 memset(&isa, 0, sizeof(struct sockaddr_in));
839 t = accept(s->s,(struct sockaddr *)&isa,&addrlen);
844 if (s->localhost_only && !ipv4_is_loopback(isa.sin_addr)) {
845 close(t); /* someone let nonlocal through?! */
846 } else if (plug_accepting(s->plug, (void*)t)) {
847 close(t); /* denied or error */
853 * If we reach here, this is not a listening socket, so
854 * readability really means readability.
857 /* In the case the socket is still frozen, we don't even bother */
859 s->frozen_readable = 1;
864 * We have received data on the socket. For an oobinline
865 * socket, this might be data _before_ an urgent pointer,
866 * in which case we send it to the back end with type==1
867 * (data prior to urgent).
869 if (s->oobinline && s->oobpending) {
871 if (ioctl(s->s, SIOCATMARK, &atmark) == 0 && atmark)
872 s->oobpending = FALSE; /* clear this indicator */
876 ret = recv(s->s, buf, s->oobpending ? 1 : sizeof(buf), 0);
877 noise_ultralight(ret);
879 if (errno == EWOULDBLOCK) {
884 return plug_closing(s->plug, error_string(errno), errno, 0);
885 } else if (0 == ret) {
886 return plug_closing(s->plug, NULL, 0, 0);
888 return plug_receive(s->plug, atmark ? 0 : 1, buf, ret);
891 case 2: /* writable */
894 * select() reports a socket as _writable_ when an
895 * asynchronous connection is completed.
897 s->connected = s->writable = 1;
901 int bufsize_before, bufsize_after;
903 bufsize_before = s->sending_oob + bufchain_size(&s->output_data);
905 bufsize_after = s->sending_oob + bufchain_size(&s->output_data);
906 if (bufsize_after < bufsize_before)
907 plug_sent(s->plug, bufsize_after);
916 * Deal with socket errors detected in try_send().
918 void net_pending_errors(void)
924 * This might be a fiddly business, because it's just possible
925 * that handling a pending error on one socket might cause
926 * others to be closed. (I can't think of any reason this might
927 * happen in current SSH implementation, but to maintain
928 * generality of this network layer I'll assume the worst.)
930 * So what we'll do is search the socket list for _one_ socket
931 * with a pending error, and then handle it, and then search
932 * the list again _from the beginning_. Repeat until we make a
933 * pass with no socket errors present. That way we are
934 * protected against the socket list changing under our feet.
938 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
939 if (s->pending_error) {
941 * An error has occurred on this socket. Pass it to the
944 plug_closing(s->plug, error_string(s->pending_error),
945 s->pending_error, 0);
953 * Each socket abstraction contains a `void *' private field in
954 * which the client can keep state.
956 static void sk_tcp_set_private_ptr(Socket sock, void *ptr)
958 Actual_Socket s = (Actual_Socket) sock;
959 s->private_ptr = ptr;
962 static void *sk_tcp_get_private_ptr(Socket sock)
964 Actual_Socket s = (Actual_Socket) sock;
965 return s->private_ptr;
969 * Special error values are returned from sk_namelookup and sk_new
970 * if there's a problem. These functions extract an error message,
971 * or return NULL if there's no problem.
973 char *sk_addr_error(SockAddr addr)
977 static char *sk_tcp_socket_error(Socket sock)
979 Actual_Socket s = (Actual_Socket) sock;
983 static void sk_tcp_set_frozen(Socket sock, int is_frozen)
985 Actual_Socket s = (Actual_Socket) sock;
986 if (s->frozen == is_frozen)
988 s->frozen = is_frozen;
989 if (!is_frozen && s->frozen_readable) {
991 recv(s->s, &c, 1, MSG_PEEK);
993 s->frozen_readable = 0;
997 static void uxsel_tell(Actual_Socket s)
1001 rwx |= 2; /* write == connect */
1002 if (s->connected && !s->frozen)
1003 rwx |= 1 | 4; /* read, except */
1004 if (bufchain_size(&s->output_data))
1005 rwx |= 2; /* write */
1007 rwx |= 1; /* read == accept */
1008 uxsel_set(s->s, rwx, net_select_result);
1011 int net_service_lookup(char *service)
1014 se = getservbyname(service, NULL);
1016 return ntohs(se->s_port);