2 * Unix networking abstraction.
11 #include <sys/types.h>
12 #include <sys/socket.h>
13 #include <sys/ioctl.h>
14 #include <arpa/inet.h>
15 #include <netinet/in.h>
16 #include <netinet/tcp.h>
19 #define DEFINE_PLUG_METHOD_MACROS
25 struct socket_function_table *fn;
26 /* the above variable absolutely *must* be the first in this structure */
34 int frozen; /* this causes readability notifications to be ignored */
35 int frozen_readable; /* this means we missed at least one readability
36 * notification while we were frozen */
37 int localhost_only; /* for listening sockets */
40 int oobpending; /* is there OOB data available to read? */
42 int pending_error; /* in case send() returns error */
47 * We used to typedef struct Socket_tag *Socket.
49 * Since we have made the networking abstraction slightly more
50 * abstract, Socket no longer means a tcp socket (it could mean
51 * an ssl socket). So now we must use Actual_Socket when we know
52 * we are talking about a tcp socket.
54 typedef struct Socket_tag *Actual_Socket;
58 /* address family this belongs to, AF_INET for IPv4, AF_INET6 for IPv6. */
60 unsigned long address; /* Address IPv4 style. */
62 struct addrinfo *ai; /* Address IPv6 style. */
66 static tree234 *sktree;
68 static int cmpfortree(void *av, void *bv)
70 Actual_Socket a = (Actual_Socket) av, b = (Actual_Socket) bv;
71 int as = a->s, bs = b->s;
79 static int cmpforsearch(void *av, void *bv)
81 Actual_Socket b = (Actual_Socket) bv;
82 int as = (int) av, bs = b->s;
92 sktree = newtree234(cmpfortree);
101 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
107 char *error_string(int error)
111 return "Network error: Permission denied";
113 return "Network error: Address already in use";
115 return "Network error: Cannot assign requested address";
118 "Network error: Address family not supported by protocol family";
120 return "Network error: Operation already in progress";
122 return "Network error: Software caused connection abort";
124 return "Network error: Connection refused";
126 return "Network error: Connection reset by peer";
128 return "Network error: Destination address required";
130 return "Network error: Bad address";
132 return "Network error: Host is down";
134 return "Network error: No route to host";
136 return "Network error: Operation now in progress";
138 return "Network error: Interrupted function call";
140 return "Network error: Invalid argument";
142 return "Network error: Socket is already connected";
144 return "Network error: Too many open files";
146 return "Network error: Message too long";
148 return "Network error: Network is down";
150 return "Network error: Network dropped connection on reset";
152 return "Network error: Network is unreachable";
154 return "Network error: No buffer space available";
156 return "Network error: Bad protocol option";
158 return "Network error: Socket is not connected";
160 return "Network error: Socket operation on non-socket";
162 return "Network error: Operation not supported";
164 return "Network error: Protocol family not supported";
165 case EPROTONOSUPPORT:
166 return "Network error: Protocol not supported";
168 return "Network error: Protocol wrong type for socket";
170 return "Network error: Cannot send after socket shutdown";
171 case ESOCKTNOSUPPORT:
172 return "Network error: Socket type not supported";
174 return "Network error: Connection timed out";
176 return "Network error: Resource temporarily unavailable";
178 return "Unknown network error";
182 SockAddr sk_namelookup(char *host, char **canonicalname)
184 SockAddr ret = smalloc(sizeof(struct SockAddr_tag));
186 struct hostent *h = NULL;
189 /* Clear the structure and default to IPv4. */
190 memset(ret, 0, sizeof(struct SockAddr_tag));
191 ret->family = 0; /* We set this one when we have resolved the host. */
195 if ((a = inet_addr(host)) == (unsigned long) INADDR_NONE) {
197 if (getaddrinfo(host, NULL, NULL, &ret->ai) == 0) {
198 ret->family = ret->ai->ai_family;
203 * Otherwise use the IPv4-only gethostbyname... (NOTE:
204 * we don't use gethostbyname as a fallback!)
206 if (ret->family == 0) {
207 /*debug(("Resolving \"%s\" with gethostbyname() (IPv4 only)...\n", host)); */
208 if ( (h = gethostbyname(host)) )
209 ret->family = AF_INET;
211 if (ret->family == 0)
212 ret->error = (h_errno == HOST_NOT_FOUND ||
213 h_errno == NO_DATA ||
214 h_errno == NO_ADDRESS ? "Host does not exist" :
215 h_errno == TRY_AGAIN ?
216 "Temporary name service failure" :
217 "gethostbyname: unknown error");
221 /* If we got an address info use that... */
224 /* Are we in IPv4 fallback mode? */
225 /* We put the IPv4 address into the a variable so we can further-on use the IPv4 code... */
226 if (ret->family == AF_INET)
228 (char *) &((struct sockaddr_in *) ret->ai->
229 ai_addr)->sin_addr, sizeof(a));
231 /* Now let's find that canonicalname... */
232 if (getnameinfo((struct sockaddr *) ret->ai->ai_addr,
234 AF_INET ? sizeof(struct sockaddr_in) :
235 sizeof(struct sockaddr_in6), realhost,
236 sizeof(realhost), NULL, 0, 0) != 0) {
237 strncpy(realhost, host, sizeof(realhost));
240 /* We used the IPv4-only gethostbyname()... */
244 memcpy(&a, h->h_addr, sizeof(a));
245 /* This way we are always sure the h->h_name is valid :) */
246 strncpy(realhost, h->h_name, sizeof(realhost));
250 * This must be a numeric IPv4 address because it caused a
251 * success return from inet_addr.
253 ret->family = AF_INET;
254 strncpy(realhost, host, sizeof(realhost));
256 ret->address = ntohl(a);
257 realhost[lenof(realhost)-1] = '\0';
258 *canonicalname = smalloc(1+strlen(realhost));
259 strcpy(*canonicalname, realhost);
263 void sk_getaddr(SockAddr addr, char *buf, int buflen)
266 if (addr->family == AF_INET) {
269 a.s_addr = htonl(addr->address);
270 strncpy(buf, inet_ntoa(a), buflen);
273 FIXME; /* I don't know how to get a text form of an IPv6 address. */
278 int sk_addrtype(SockAddr addr)
280 return (addr->family == AF_INET ? ADDRTYPE_IPV4 : ADDRTYPE_IPV6);
283 void sk_addrcopy(SockAddr addr, char *buf)
286 if (addr->family == AF_INET) {
289 a.s_addr = htonl(addr->address);
290 memcpy(buf, (char*) &a.s_addr, 4);
293 memcpy(buf, (char*) addr->ai, 16);
298 void sk_addr_free(SockAddr addr)
303 static Plug sk_tcp_plug(Socket sock, Plug p)
305 Actual_Socket s = (Actual_Socket) sock;
312 static void sk_tcp_flush(Socket s)
315 * We send data to the socket as soon as we can anyway,
316 * so we don't need to do anything here. :-)
320 static void sk_tcp_close(Socket s);
321 static int sk_tcp_write(Socket s, char *data, int len);
322 static int sk_tcp_write_oob(Socket s, char *data, int len);
323 static void sk_tcp_set_private_ptr(Socket s, void *ptr);
324 static void *sk_tcp_get_private_ptr(Socket s);
325 static void sk_tcp_set_frozen(Socket s, int is_frozen);
326 static char *sk_tcp_socket_error(Socket s);
328 Socket sk_register(void *sock, Plug plug)
330 static struct socket_function_table fn_table = {
336 sk_tcp_set_private_ptr,
337 sk_tcp_get_private_ptr,
345 * Create Socket structure.
347 ret = smalloc(sizeof(struct Socket_tag));
351 bufchain_init(&ret->output_data);
352 ret->writable = 1; /* to start with */
353 ret->sending_oob = 0;
355 ret->frozen_readable = 0;
356 ret->localhost_only = 0; /* unused, but best init anyway */
357 ret->pending_error = 0;
358 ret->oobpending = FALSE;
364 ret->error = error_string(errno);
375 Socket sk_new(SockAddr addr, int port, int privport, int oobinline,
376 int nodelay, Plug plug)
378 static struct socket_function_table fn_table = {
384 sk_tcp_set_private_ptr,
385 sk_tcp_get_private_ptr,
392 struct sockaddr_in6 a6;
394 struct sockaddr_in a;
400 * Create Socket structure.
402 ret = smalloc(sizeof(struct Socket_tag));
406 bufchain_init(&ret->output_data);
407 ret->connected = 0; /* to start with */
408 ret->writable = 0; /* to start with */
409 ret->sending_oob = 0;
411 ret->frozen_readable = 0;
412 ret->localhost_only = 0; /* unused, but best init anyway */
413 ret->pending_error = 0;
414 ret->oobpending = FALSE;
420 s = socket(addr->family, SOCK_STREAM, 0);
424 ret->error = error_string(errno);
428 ret->oobinline = oobinline;
431 setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (void *) &b, sizeof(b));
436 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (void *) &b, sizeof(b));
440 * Bind to local address.
443 localport = 1023; /* count from 1023 downwards */
445 localport = 0; /* just use port 0 (ie kernel picks) */
447 /* Loop round trying to bind */
452 if (addr->family == AF_INET6) {
453 memset(&a6, 0, sizeof(a6));
454 a6.sin6_family = AF_INET6;
455 /*a6.sin6_addr = in6addr_any; *//* == 0 */
456 a6.sin6_port = htons(localport);
460 a.sin_family = AF_INET;
461 a.sin_addr.s_addr = htonl(INADDR_ANY);
462 a.sin_port = htons(localport);
465 retcode = bind(s, (addr->family == AF_INET6 ?
466 (struct sockaddr *) &a6 :
467 (struct sockaddr *) &a),
469 AF_INET6 ? sizeof(a6) : sizeof(a)));
471 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
478 if (err != EADDRINUSE) /* failed, for a bad reason */
483 break; /* we're only looping once */
486 break; /* we might have got to the end */
490 ret->error = error_string(err);
495 * Connect to remote address.
498 if (addr->family == AF_INET6) {
499 memset(&a, 0, sizeof(a));
500 a6.sin6_family = AF_INET6;
501 a6.sin6_port = htons((short) port);
503 ((struct sockaddr_in6 *) addr->ai->ai_addr)->sin6_addr;
507 a.sin_family = AF_INET;
508 a.sin_addr.s_addr = htonl(addr->address);
509 a.sin_port = htons((short) port);
514 connect(s, ((addr->family == AF_INET6) ?
515 (struct sockaddr *) &a6 : (struct sockaddr *) &a),
516 (addr->family == AF_INET6) ? sizeof(a6) : sizeof(a))
518 connect(s, (struct sockaddr *) &a, sizeof(a))
522 * FIXME: We are prepared to receive EWOULDBLOCK here,
523 * because we might want the connection to be made
524 * asynchronously; but how do we actually arrange this in
527 if ( errno != EWOULDBLOCK ) {
528 ret->error = error_string(errno);
533 * If we _don't_ get EWOULDBLOCK, the connect has completed
534 * and we should set the socket as connected and writable.
545 Socket sk_newlistener(int port, Plug plug, int local_host_only)
547 static struct socket_function_table fn_table = {
553 sk_tcp_set_private_ptr,
554 sk_tcp_get_private_ptr,
561 struct sockaddr_in6 a6;
563 struct sockaddr_in a;
570 * Create Socket structure.
572 ret = smalloc(sizeof(struct Socket_tag));
576 bufchain_init(&ret->output_data);
577 ret->writable = 0; /* to start with */
578 ret->sending_oob = 0;
580 ret->frozen_readable = 0;
581 ret->localhost_only = local_host_only;
582 ret->pending_error = 0;
583 ret->oobpending = FALSE;
589 s = socket(AF_INET, SOCK_STREAM, 0);
593 ret->error = error_string(errno);
599 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)&on, sizeof(on));
602 if (addr->family == AF_INET6) {
603 memset(&a6, 0, sizeof(a6));
604 a6.sin6_family = AF_INET6;
606 a6.sin6_addr = in6addr_loopback;
608 a6.sin6_addr = in6addr_any;
609 a6.sin6_port = htons(port);
613 a.sin_family = AF_INET;
615 a.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
617 a.sin_addr.s_addr = htonl(INADDR_ANY);
618 a.sin_port = htons((short)port);
621 retcode = bind(s, (addr->family == AF_INET6 ?
622 (struct sockaddr *) &a6 :
623 (struct sockaddr *) &a),
625 AF_INET6 ? sizeof(a6) : sizeof(a)));
627 retcode = bind(s, (struct sockaddr *) &a, sizeof(a));
636 ret->error = error_string(err);
641 if (listen(s, SOMAXCONN) < 0) {
643 ret->error = error_string(errno);
652 static void sk_tcp_close(Socket sock)
654 Actual_Socket s = (Actual_Socket) sock;
662 * The function which tries to send on a socket once it's deemed
665 void try_send(Actual_Socket s)
667 while (s->sending_oob || bufchain_size(&s->output_data) > 0) {
673 if (s->sending_oob) {
674 urgentflag = MSG_OOB;
675 len = s->sending_oob;
679 bufchain_prefix(&s->output_data, &data, &len);
681 nsent = send(s->s, data, len, urgentflag);
682 noise_ultralight(nsent);
684 err = (nsent < 0 ? errno : 0);
685 if (err == EWOULDBLOCK) {
687 * Perfectly normal: we've sent all we can for the moment.
691 } else if (nsent == 0 ||
692 err == ECONNABORTED || err == ECONNRESET) {
694 * If send() returns CONNABORTED or CONNRESET, we
695 * unfortunately can't just call plug_closing(),
696 * because it's quite likely that we're currently
697 * _in_ a call from the code we'd be calling back
698 * to, so we'd have to make half the SSH code
699 * reentrant. Instead we flag a pending error on
700 * the socket, to be dealt with (by calling
701 * plug_closing()) at some suitable future moment.
703 s->pending_error = err;
706 /* We're inside the Unix frontend here, so we know
707 * that the frontend handle is unnecessary. */
708 logevent(NULL, error_string(err));
709 fatalbox("%s", error_string(err));
712 if (s->sending_oob) {
714 memmove(s->oobdata, s->oobdata+nsent, len-nsent);
715 s->sending_oob = len - nsent;
720 bufchain_consume(&s->output_data, nsent);
726 static int sk_tcp_write(Socket sock, char *buf, int len)
728 Actual_Socket s = (Actual_Socket) sock;
731 * Add the data to the buffer list on the socket.
733 bufchain_add(&s->output_data, buf, len);
736 * Now try sending from the start of the buffer list.
741 return bufchain_size(&s->output_data);
744 static int sk_tcp_write_oob(Socket sock, char *buf, int len)
746 Actual_Socket s = (Actual_Socket) sock;
749 * Replace the buffer list on the socket with the data.
751 bufchain_clear(&s->output_data);
752 assert(len <= sizeof(s->oobdata));
753 memcpy(s->oobdata, buf, len);
754 s->sending_oob = len;
757 * Now try sending from the start of the buffer list.
762 return s->sending_oob;
765 int select_result(int fd, int event)
769 char buf[20480]; /* nice big buffer for plenty of speed */
773 /* Find the Socket structure */
774 s = find234(sktree, (void *) fd, cmpforsearch);
776 return 1; /* boggle */
778 noise_ultralight(event);
781 #ifdef FIXME_NONBLOCKING_CONNECTIONS
782 case FIXME: /* connected */
783 s->connected = s->writable = 1;
786 case 4: /* exceptional */
789 * On a non-oobinline socket, this indicates that we
790 * can immediately perform an OOB read and get back OOB
791 * data, which we will send to the back end with
792 * type==2 (urgent data).
794 ret = recv(s->s, buf, sizeof(buf), MSG_OOB);
795 noise_ultralight(ret);
797 char *str = (ret == 0 ? "Internal networking trouble" :
798 error_string(errno));
799 /* We're inside the Unix frontend here, so we know
800 * that the frontend handle is unnecessary. */
804 return plug_receive(s->plug, 2, buf, ret);
810 * If we reach here, this is an oobinline socket, which
811 * means we should set s->oobpending and then fall through
814 s->oobpending = TRUE;
815 case 1: /* readable; also acceptance */
818 * On a listening socket, the readability event means a
819 * connection is ready to be accepted.
821 struct sockaddr_in isa;
822 int addrlen = sizeof(struct sockaddr_in);
823 int t; /* socket of connection */
825 memset(&isa, 0, sizeof(struct sockaddr_in));
827 t = accept(s->s,(struct sockaddr *)&isa,&addrlen);
832 if (s->localhost_only &&
833 ntohl(isa.sin_addr.s_addr) != INADDR_LOOPBACK) {
834 close(t); /* someone let nonlocal through?! */
835 } else if (plug_accepting(s->plug, (void*)t)) {
836 close(t); /* denied or error */
842 * If we reach here, this is not a listening socket, so
843 * readability really means readability.
846 /* In the case the socket is still frozen, we don't even bother */
848 s->frozen_readable = 1;
853 * We have received data on the socket. For an oobinline
854 * socket, this might be data _before_ an urgent pointer,
855 * in which case we send it to the back end with type==1
856 * (data prior to urgent).
858 if (s->oobinline && s->oobpending) {
860 if (ioctl(s->s, SIOCATMARK, &atmark) == 0 && atmark)
861 s->oobpending = FALSE; /* clear this indicator */
865 ret = recv(s->s, buf, sizeof(buf), 0);
866 noise_ultralight(ret);
868 if (errno == EWOULDBLOCK) {
873 return plug_closing(s->plug, error_string(errno), errno, 0);
874 } else if (0 == ret) {
875 return plug_closing(s->plug, NULL, 0, 0);
877 return plug_receive(s->plug, atmark ? 0 : 1, buf, ret);
880 case 2: /* writable */
882 int bufsize_before, bufsize_after;
884 bufsize_before = s->sending_oob + bufchain_size(&s->output_data);
886 bufsize_after = s->sending_oob + bufchain_size(&s->output_data);
887 if (bufsize_after < bufsize_before)
888 plug_sent(s->plug, bufsize_after);
897 * Deal with socket errors detected in try_send().
899 void net_pending_errors(void)
905 * This might be a fiddly business, because it's just possible
906 * that handling a pending error on one socket might cause
907 * others to be closed. (I can't think of any reason this might
908 * happen in current SSH implementation, but to maintain
909 * generality of this network layer I'll assume the worst.)
911 * So what we'll do is search the socket list for _one_ socket
912 * with a pending error, and then handle it, and then search
913 * the list again _from the beginning_. Repeat until we make a
914 * pass with no socket errors present. That way we are
915 * protected against the socket list changing under our feet.
919 for (i = 0; (s = index234(sktree, i)) != NULL; i++) {
920 if (s->pending_error) {
922 * An error has occurred on this socket. Pass it to the
925 plug_closing(s->plug, error_string(s->pending_error),
926 s->pending_error, 0);
934 * Each socket abstraction contains a `void *' private field in
935 * which the client can keep state.
937 static void sk_tcp_set_private_ptr(Socket sock, void *ptr)
939 Actual_Socket s = (Actual_Socket) sock;
940 s->private_ptr = ptr;
943 static void *sk_tcp_get_private_ptr(Socket sock)
945 Actual_Socket s = (Actual_Socket) sock;
946 return s->private_ptr;
950 * Special error values are returned from sk_namelookup and sk_new
951 * if there's a problem. These functions extract an error message,
952 * or return NULL if there's no problem.
954 char *sk_addr_error(SockAddr addr)
958 static char *sk_tcp_socket_error(Socket sock)
960 Actual_Socket s = (Actual_Socket) sock;
964 static void sk_tcp_set_frozen(Socket sock, int is_frozen)
966 Actual_Socket s = (Actual_Socket) sock;
967 if (s->frozen == is_frozen)
969 s->frozen = is_frozen;
970 if (!is_frozen && s->frozen_readable) {
972 recv(s->s, &c, 1, MSG_PEEK);
974 s->frozen_readable = 0;
978 * For Unix select()-based frontends: enumerate all sockets
979 * currently active, and state whether we currently wish to receive
980 * select events on them for reading, writing and exceptional
983 static void set_rwx(Actual_Socket s, int *rwx)
986 if (s->connected && !s->frozen)
987 val |= 1 | 4; /* read, except */
988 if (bufchain_size(&s->output_data))
989 val |= 2; /* write */
991 val |= 1; /* read == accept */
995 int first_socket(int *state, int *rwx)
999 s = index234(sktree, (*state)++);
1002 return s ? s->s : -1;
1005 int next_socket(int *state, int *rwx)
1007 Actual_Socket s = index234(sktree, (*state)++);
1010 return s ? s->s : -1;
1013 int net_service_lookup(char *service)
1016 se = getservbyname(service, NULL);
1018 return ntohs(se->s_port);