/* * Copyright (c) 2016, 2017, 2018, 2022 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * net/ping.cpp * Internet Control Message Protocol Echo. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef IOV_MAX #include #endif #include #include #include #include #include #include #include #include #include #include #include #include "ip.h" #include "ping.h" namespace Sortix { namespace Ping { class PingSocket; struct icmp { uint8_t type; uint8_t code; uint16_t checksum; uint8_t roh[4]; }; struct icmp_echo { uint8_t type; uint8_t code; uint16_t checksum; uint16_t identifier; uint16_t sequence; }; struct ping { uint8_t type; uint8_t code; uint16_t checksum; uint16_t identifier; }; union ping_sockaddr { sa_family_t family; struct sockaddr_in in; struct sockaddr_in6 in6; }; #define ICMP_TYPE_ECHO_REPLY 0 #define ICMP_TYPE_ECHO_REQUEST 8 // These values are documented in ping(4). static const size_t DEFAULT_PACKET_LIMIT = 64; static const size_t MAXIMAL_PACKET_LIMIT = 4096; static kthread_mutex_t bind_lock = KTHREAD_MUTEX_INITIALIZER; static PingSocket** bindings_v4; static PingSocket** bindings_v6; void Init() { if ( !(bindings_v4 = new PingSocket*[65536]) || !(bindings_v6 = new PingSocket*[65536]) ) Panic("Failed to allocate Ping Socket bindings"); for ( size_t i = 0; i < 65536; i++ ) { bindings_v4[i] = NULL; bindings_v6[i] = NULL; } } static bool IsSupportedAddressFamily(int af) { return af == AF_INET /* TODO: || af == AF_INET6 */; } static size_t AddressFamilySize(int af) { switch ( af ) { case AF_INET: return sizeof(struct sockaddr_in); case AF_INET6: return sizeof(struct sockaddr_in6); } return 0; } class PingSocket : public AbstractInode { friend void HandleIP(Ref pkt, const struct in_addr* src, const struct in_addr* dst, bool dst_broadcast); public: PingSocket(int af); virtual ~PingSocket(); virtual Ref accept4(ioctx_t* ctx, uint8_t* addr, size_t* addrsize, int flags); virtual int bind(ioctx_t* ctx, const uint8_t* addr, size_t addrsize); virtual int connect(ioctx_t* ctx, const uint8_t* addr, size_t addrsize); virtual int listen(ioctx_t* ctx, int backlog); virtual ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); virtual ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); virtual ssize_t recvmsg_internal(ioctx_t* ctx, struct msghdr* msg, int flags); virtual ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); virtual ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); virtual ssize_t sendmsg_internal(ioctx_t* ctx, const struct msghdr* msg, int flags); virtual ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual int poll(ioctx_t* ctx, PollNode* node); virtual int getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr); virtual int setsockopt(ioctx_t* ctx, int level, int option_name, const void* option_value, size_t option_size); virtual int shutdown(ioctx_t* ctx, int how); virtual int getpeername(ioctx_t* ctx, uint8_t* addr, size_t* addrsize); virtual int getsockname(ioctx_t* ctx, uint8_t* addr, size_t* addrsize); public: void ReceivePacket(Ref pkt); private: short PollEventStatus(); bool ImportAddress(ioctx_t* ctx, union ping_sockaddr* dest, const void* addr, size_t addrsize); bool CanBind(union ping_sockaddr new_local); bool BindDefault(const union ping_sockaddr* new_local); private: kthread_mutex_t socket_lock; kthread_cond_t receive_cond; PollChannel poll_channel; union ping_sockaddr local; union ping_sockaddr remote; Ref first_packet; Ref last_packet; PingSocket* prev_socket; PingSocket* next_socket; size_t receive_current; size_t receive_limit; size_t send_limit; unsigned int ifindex; int af; int sockerr; int how_shutdown; bool bound; bool broadcast; bool connected; bool reuseaddr; }; PingSocket::PingSocket(int af) { Process* process = CurrentProcess(); inode_type = INODE_TYPE_STREAM; dev = (dev_t) this; ino = (ino_t) this; type = S_IFSOCK; kthread_mutex_lock(&process->idlock); stat_uid = process->uid; stat_gid = process->gid; kthread_mutex_unlock(&process->idlock); stat_mode = 0600 | this->type; supports_iovec = true; socket_lock = KTHREAD_MUTEX_INITIALIZER; receive_cond = KTHREAD_COND_INITIALIZER; // poll_channel initialized by constructor memset(&local, 0, sizeof(local)); memset(&remote, 0, sizeof(remote)); if ( af == AF_INET ) { local.in.sin_family = AF_INET; local.in.sin_addr.s_addr = htobe32(INADDR_ANY); local.in.sin_port = htobe16(0); remote.in.sin_family = AF_INET; remote.in.sin_addr.s_addr = htobe32(INADDR_ANY); remote.in.sin_port = htobe16(0); } else if ( af == AF_INET6 ) { local.in6.sin6_family = AF_INET6; local.in6.sin6_addr = in6addr_any; local.in6.sin6_port = htobe16(0); remote.in6.sin6_family = AF_INET6; remote.in6.sin6_addr = in6addr_any; remote.in6.sin6_port = htobe16(0); } // first_packet initialized by constructor // last_packet initialized by constructor prev_socket = NULL; next_socket = NULL; receive_current = 0; receive_limit = DEFAULT_PACKET_LIMIT * Page::Size(); send_limit = DEFAULT_PACKET_LIMIT * Page::Size(); ifindex = 0; this->af = af; sockerr = 0; how_shutdown = 0; bound = false; broadcast = false; connected = false; reuseaddr = false; } PingSocket::~PingSocket() { if ( bound ) { ScopedLock lock(&bind_lock); if ( af == AF_INET ) { uint16_t port = be16toh(local.in.sin_port); if ( prev_socket ) prev_socket->next_socket = next_socket; else bindings_v4[port] = next_socket; if ( next_socket ) next_socket->prev_socket = prev_socket; } else if ( af == AF_INET6 ) { uint16_t port = be16toh(local.in6.sin6_port); if ( prev_socket ) prev_socket->next_socket = next_socket; else bindings_v6[port] = next_socket; if ( next_socket ) next_socket->prev_socket = prev_socket; } bound = false; } // Avoid stack overflow in first_packet recursive destructor. while ( first_packet ) { Ref next = first_packet->next; first_packet->next.Reset(); first_packet = next; } last_packet.Reset(); } Ref PingSocket::accept4(ioctx_t* /*ctx*/, uint8_t* /*addr*/, size_t* /*addrsize*/, int /*flags*/) { return errno = EOPNOTSUPP, Ref(NULL); } bool PingSocket::ImportAddress(ioctx_t* ctx, union ping_sockaddr* dest, const void* addr, size_t addrsize) { if ( addrsize != AddressFamilySize(af) ) { sa_family_t family; if ( sizeof(family) <= addrsize && ctx->copy_from_src(&family, addr, sizeof(family)) && family == AF_UNSPEC ) { union ping_sockaddr unspec; memset(&unspec, 0, sizeof(unspec)); unspec.family = AF_UNSPEC; memcpy(dest, &unspec, sizeof(unspec)); return true; } return errno = EINVAL, false; } union ping_sockaddr copy; memset(©, 0, sizeof(copy)); if ( !ctx->copy_from_src(©, addr, addrsize) ) return false; if ( copy.family != af && copy.family != AF_UNSPEC ) return errno = EAFNOSUPPORT, false; memcpy(dest, ©, sizeof(copy)); return true; } // bind_lock locked, socket_lock locked (in that order) bool PingSocket::CanBind(union ping_sockaddr new_local) { if ( af == AF_INET ) { // Bind to either the any address, the broadcast address, the address of // a network interface, or the broadcast address of a network interface. if ( new_local.in.sin_addr.s_addr != htobe32(INADDR_ANY) && new_local.in.sin_addr.s_addr != htobe32(INADDR_BROADCAST) ) { // TODO: What happens to sockets if the network interface changes // its address? ScopedLock ifs_lock(&netifs_lock); bool found = false; for ( unsigned int i = 1; i < netifs_count; i++ ) { NetworkInterface* netif = netifs[i]; if ( !netif ) continue; ScopedLock cfg_lock(&netif->cfg_lock); struct in_addr if_broadcast_ip; if_broadcast_ip.s_addr = netif->cfg.inet.address.s_addr | ~netif->cfg.inet.subnet.s_addr; if ( memcmp(&netif->cfg.inet.address, &new_local.in.sin_addr, sizeof(struct in_addr)) == 0 || memcmp(&if_broadcast_ip, &new_local.in.sin_addr, sizeof(struct in_addr)) == 0 ) { found = true; break; } } // No interface had the correct address. if ( !found ) return errno = EADDRNOTAVAIL, false; } uint16_t port = be16toh(new_local.in.sin_port); if ( port == 0 ) return errno = EINVAL, false; for ( PingSocket* socket = bindings_v4[port]; socket; socket = socket->next_socket ) { // Taking the lock of the other socket is safe against deadlocks, // despite having the lock of this socket, because bind_lock was // locked prior to this socket's lock, and bind_lock must always // be taken before the same thread locks two sockets. ScopedLock lock(&socket->socket_lock); if ( new_local.in.sin_addr.s_addr == htobe32(INADDR_ANY) && !(reuseaddr && socket->reuseaddr) ) return errno = EADDRINUSE, false; if ( socket->local.in.sin_addr.s_addr == htobe32(INADDR_ANY) && !(reuseaddr && socket->reuseaddr) ) return errno = EADDRINUSE, false; if ( new_local.in.sin_addr.s_addr == socket->local.in.sin_addr.s_addr ) return errno = EADDRINUSE, false; } } else if ( af == AF_INET6 ) { // TODO: IPv6 support for seeing if any interface has the address. if ( true ) return errno = EAFNOSUPPORT, false; uint16_t port = be16toh(new_local.in6.sin6_port); if ( port == 0 ) return errno = EINVAL, false; for ( PingSocket* socket = bindings_v6[port]; socket; socket = socket->next_socket ) { if ( !memcmp(&new_local.in6.sin6_addr, &in6addr_any, sizeof(in6addr_any)) && !(reuseaddr && socket->reuseaddr) ) if ( !memcmp(&socket->local.in6.sin6_addr, &in6addr_any, sizeof(in6addr_any)) && !(reuseaddr && socket->reuseaddr) ) if ( !memcmp(&new_local.in6.sin6_addr, &socket->local.in6.sin6_addr, sizeof(new_local.in6.sin6_addr)) ) return errno = EADDRINUSE, false; } } else return errno = EAFNOSUPPORT, false; return true; } int PingSocket::bind(ioctx_t* ctx, const uint8_t* addr, size_t addrsize) { ScopedLock lock2(&bind_lock); ScopedLock lock(&socket_lock); if ( bound ) return errno = EINVAL, -1; union ping_sockaddr new_local; if ( !ImportAddress(ctx, &new_local, addr, addrsize) ) return -1; if ( new_local.family == AF_UNSPEC ) return errno = EAFNOSUPPORT, -1; uint16_t port; if ( af == AF_INET ) port = be16toh(new_local.in.sin_port); else if ( af == AF_INET6 ) port = be16toh(new_local.in6.sin6_port); else return errno = EAFNOSUPPORT, -1; if ( port == 0 ) return BindDefault(&new_local) ? 0 : -1; if ( !CanBind(new_local) ) return -1; if ( af == AF_INET ) { uint16_t port = be16toh(new_local.in.sin_port); if ( bindings_v4[port] ) bindings_v4[port]->prev_socket = this; next_socket = bindings_v4[port]; prev_socket = NULL; bindings_v4[port] = this; } else if ( af == AF_INET6 ) { uint16_t port = be16toh(new_local.in6.sin6_port); if ( bindings_v6[port] ) bindings_v6[port]->prev_socket = this; next_socket = bindings_v6[port]; prev_socket = NULL; bindings_v6[port] = this; } else return errno = EAFNOSUPPORT, -1; memcpy(&local, &new_local, sizeof(new_local)); bound = true; return 0; } // bind_lock locked, socket_lock locked (in that order) bool PingSocket::BindDefault(const union ping_sockaddr* new_local_ptr) { // TODO: This allocator becomes increasingly biased as more ports are // allocated. // TODO: Try not to allocate recently used ports. union ping_sockaddr new_local; if ( new_local_ptr ) memcpy(&new_local, new_local_ptr, sizeof(union ping_sockaddr)); else { memset(&new_local, 0, sizeof(new_local)); if ( af == AF_INET ) { new_local.in.sin_family = AF_INET; new_local.in.sin_addr.s_addr = htobe32(INADDR_ANY); } else if ( af == AF_INET6 ) { new_local.in6.sin6_family = AF_INET6; new_local.in6.sin6_addr = in6addr_any; } else return errno = EAFNOSUPPORT, false; } uint16_t start = 32768; // Documented in ping(4). uint16_t end = 61000; // Documented in ping(4). uint16_t count = end - start; uint16_t offset = arc4random_uniform(count); for ( uint16_t i = 0; i < count; i++ ) { uint16_t j = offset + i; if ( count <= j ) j -= count; uint16_t port = start + j; if ( af == AF_INET ) new_local.in.sin_port = htobe16(port); else if ( af == AF_INET6 ) new_local.in6.sin6_port = htobe16(port); else return errno = EAFNOSUPPORT, false; if ( !CanBind(new_local) ) { if ( errno == EADDRINUSE ) continue; return false; } if ( af == AF_INET ) { if ( bindings_v4[port] ) bindings_v4[port]->prev_socket = this; next_socket = bindings_v4[port]; prev_socket = NULL; bindings_v4[port] = this; } else if ( af == AF_INET6 ) { if ( bindings_v6[port] ) bindings_v6[port]->prev_socket = this; next_socket = bindings_v6[port]; prev_socket = NULL; bindings_v6[port] = this; } else return errno = EAFNOSUPPORT, false; memcpy(&local, &new_local, sizeof(new_local)); bound = true; return true; } return errno = EAGAIN, false; } int PingSocket::connect(ioctx_t* ctx, const uint8_t* addr, size_t addrsize) { ScopedLock lock2(&bind_lock); ScopedLock lock(&socket_lock); union ping_sockaddr new_remote; if ( !ImportAddress(ctx, &new_remote, addr, addrsize) ) return -1; if ( new_remote.family == AF_UNSPEC ) { // Disconnect the socket when connecting to the AF_UNSPEC family. connected = false; return 0; } else if ( af == AF_INET ) { } else return errno = EAFNOSUPPORT, -1; // If the socket is not bound, find a route to the remote address and bind // to the appropriate source address. if ( !bound ) { union ping_sockaddr new_local; memset(&new_local, 0, sizeof(new_local)); if ( af == AF_INET ) { struct in_addr any; any.s_addr = htobe32(INADDR_ANY); new_local.in.sin_family = AF_INET; if ( !IP::GetSourceIP(&any, &new_remote.in.sin_addr, &new_local.in.sin_addr, ifindex, NULL) ) return -1; new_local.in.sin_port = htobe16(0); } else return errno = EAFNOSUPPORT, -1; if ( !BindDefault(&new_local) ) return -1; } // Test if there is a route from the local address to the remote address. if ( af == AF_INET ) { if ( !IP::GetSourceIP(&local.in.sin_addr, &new_remote.in.sin_addr, NULL, ifindex, NULL) ) return -1; } else return errno = EAFNOSUPPORT, -1; // Set the remote address and become connected. connected = true; memcpy(&remote, &new_remote, sizeof(new_remote)); // Discard datagrams not from the new remote, thus enforcing that all // datagrams provided by recvmsg always comes from the address connected to. size_t name_size = AddressFamilySize(af); Ref* packet_ptr = &first_packet; while ( *packet_ptr ) { void* name = first_packet->from + first_packet->offset; if ( memcmp(name, &remote, name_size) != 0 ) { Ref next = (*packet_ptr)->next; (*packet_ptr)->next.Reset(); packet_ptr->Reset(); *packet_ptr = next; continue; } packet_ptr = &(*packet_ptr)->next; } if ( !first_packet ) last_packet.Reset(); return 0; } int PingSocket::listen(ioctx_t* /*ctx*/, int /*backlog*/) { return errno = EOPNOTSUPP, -1; } ssize_t PingSocket::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) { struct msghdr msg; memset(&msg, 0, sizeof(msg)); msg.msg_iov = (struct iovec*) iov; msg.msg_iovlen = iovcnt; return recvmsg_internal(ctx, &msg, 0); } ssize_t PingSocket::recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags) { struct iovec iov; memset(&iov, 0, sizeof(iov)); iov.iov_base = (void*) buf; iov.iov_len = count; struct msghdr msg; memset(&msg, 0, sizeof(msg)); msg.msg_iov = &iov; msg.msg_iovlen = 1; return recvmsg_internal(ctx, &msg, flags); } ssize_t PingSocket::recvmsg(ioctx_t* ctx, struct msghdr* msg_ptr, int flags) { struct msghdr msg; if ( !ctx->copy_from_src(&msg, msg_ptr, sizeof(msg)) ) return -1; if ( msg.msg_iovlen < 0 || IOV_MAX < msg.msg_iovlen ) return errno = EINVAL, -1; size_t iov_size = msg.msg_iovlen * sizeof(struct iovec); struct iovec* iov = new struct iovec[msg.msg_iovlen]; if ( !iov ) return -1; struct iovec* user_iov = msg.msg_iov; if ( !ctx->copy_from_src(iov, user_iov, iov_size) ) return delete[] iov, -1; msg.msg_iov = iov; ssize_t result = recvmsg_internal(ctx, &msg, flags); msg.msg_iov = user_iov; delete[] iov; if ( !ctx->copy_to_dest(msg_ptr, &msg, sizeof(msg)) ) return -1; return result; } ssize_t PingSocket::recvmsg_internal(ioctx_t* ctx, struct msghdr* msg, int flags) { if ( flags & ~(MSG_PEEK) ) return errno = EINVAL, -1; ScopedLock lock(&socket_lock); if ( sockerr ) { errno = sockerr; sockerr = 0; return -1; } if ( how_shutdown & SHUT_RD ) return 0; while ( !first_packet ) { if ( ctx->dflags & O_NONBLOCK ) return errno = EWOULDBLOCK, -1; if ( !kthread_cond_wait_signal(&receive_cond, &socket_lock) ) return errno = EINTR, -1; } void* name = first_packet->from + first_packet->offset; size_t name_size = AddressFamilySize(af); assert(name_size <= first_packet->length - first_packet->offset); if ( msg->msg_name ) { if ( name_size < msg->msg_namelen ) msg->msg_namelen = name_size; if ( !ctx->copy_to_dest(msg->msg_name, name, msg->msg_namelen) ) return -1; } else msg->msg_namelen = 0; first_packet->offset += name_size; const unsigned char* in = first_packet->from + first_packet->offset; size_t in_length = first_packet->length - first_packet->offset; msg->msg_controllen = 0; msg->msg_flags = 0; if ( SSIZE_MAX < TruncateIOVec(msg->msg_iov, msg->msg_iovlen, SSIZE_MAX) ) return errno = EINVAL, -1; size_t sofar = 0; for ( int i = 0; i < msg->msg_iovlen && sofar < in_length; i++) { size_t in_left = in_length - sofar; const struct iovec* iov = &msg->msg_iov[i]; size_t count = in_left < iov->iov_len ? in_left : iov->iov_len; if ( !ctx->copy_to_dest(iov->iov_base, in + sofar, count) ) return -1; sofar += count; } if ( sofar < in_length ) msg->msg_flags |= MSG_TRUNC; if ( !(flags & MSG_PEEK) ) { receive_current -= first_packet->pmap.size; Ref next = first_packet->next; first_packet->next.Reset(); first_packet = next; if ( !first_packet ) last_packet.Reset(); } return sofar; } ssize_t PingSocket::send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags) { struct iovec iov; memset(&iov, 0, sizeof(iov)); iov.iov_base = (void*) buf; iov.iov_len = count; struct msghdr msg; memset(&msg, 0, sizeof(msg)); msg.msg_iov = &iov; msg.msg_iovlen = 1; return sendmsg_internal(ctx, &msg, flags); } ssize_t PingSocket::sendmsg(ioctx_t* ctx, const struct msghdr* msg_ptr, int flags) { struct msghdr msg; if ( !ctx->copy_from_src(&msg, msg_ptr, sizeof(msg)) ) return -1; if ( msg.msg_iovlen < 0 || IOV_MAX < msg.msg_iovlen ) return errno = EINVAL, -1; size_t iov_size = msg.msg_iovlen * sizeof(struct iovec); struct iovec* iov = new struct iovec[msg.msg_iovlen]; if ( !iov ) return -1; if ( !ctx->copy_from_src(iov, msg.msg_iov, iov_size) ) return delete[] iov, -1; msg.msg_iov = iov; ssize_t result = sendmsg_internal(ctx, &msg, flags); delete[] iov; return result; } ssize_t PingSocket::sendmsg_internal(ioctx_t* ctx, const struct msghdr* msg, int flags) { if ( flags & ~(MSG_NOSIGNAL) ) // TODO: MSG_DONTROUTE return errno = EINVAL, -1; ScopedLock lock(&socket_lock); if ( how_shutdown & SHUT_WR ) { if ( !(flags & MSG_NOSIGNAL) ) CurrentThread()->DeliverSignal(SIGPIPE); return errno = EPIPE, -1; } if ( sockerr ) { errno = sockerr; sockerr = 0; return -1; } union ping_sockaddr sendto; if ( msg->msg_name ) { if ( connected ) return errno = EISCONN, -1; if ( af == AF_INET ) { if ( msg->msg_namelen != sizeof(sendto.in) ) return errno = EINVAL, -1; sendto.family = af; if ( !ctx->copy_from_src(&sendto.in, msg->msg_name, sizeof(sendto.in)) ) return -1; } // TODO: IPv6 support. else return errno = EAFNOSUPPORT, -1; } else if ( connected ) sendto = remote; else return errno = EDESTADDRREQ, -1; if ( !bound ) { kthread_mutex_unlock(&socket_lock); // Don't deadlock. kthread_mutex_lock(&bind_lock); kthread_mutex_lock(&socket_lock); bool was_bound = BindDefault(NULL); kthread_mutex_unlock(&bind_lock); if ( !was_bound ) return -1; } // Find a route to the destination and verify the port is non-zero. union ping_sockaddr sendfrom; if ( af == AF_INET ) { if ( !IP::GetSourceIP(&local.in.sin_addr, &sendto.in.sin_addr, &sendfrom.in.sin_addr, ifindex) ) return -1; } // TODO: IPv6 support. else return errno = EAFNOSUPPORT, -1; Ref pkt = GetPacket(); if ( !pkt ) return -1; size_t mtu = pkt->pmap.size; if ( mtu < sizeof(struct ping) ) return errno = EMSGSIZE, -1; pkt->length = sizeof(struct ping); unsigned char* out = pkt->from; struct ping hdr; hdr.type = ICMP_TYPE_ECHO_REQUEST; hdr.code = 0; hdr.checksum = 0; if ( af == AF_INET ) hdr.identifier = local.in.sin_port; else if ( af == AF_INET6 ) hdr.identifier = local.in6.sin6_port; else return errno = EAFNOSUPPORT, -1; if ( SSIZE_MAX < TruncateIOVec(msg->msg_iov, msg->msg_iovlen, SSIZE_MAX) ) return errno = EINVAL, -1; size_t count = 0; for ( int i = 0; i < msg->msg_iovlen; i++ ) { const struct iovec* iov = &msg->msg_iov[i]; if ( mtu - pkt->length < iov->iov_len ) return errno = EMSGSIZE, -1; if ( !ctx->copy_from_src(out + pkt->length, iov->iov_base, iov->iov_len) ) return -1; pkt->length += iov->iov_len; count += iov->iov_len; } if ( count < 4 ) // Require sequence number. return errno = EINVAL, -1; memcpy(out, &hdr, sizeof(hdr)); hdr.checksum = htobe16(IP::ipsum(out, pkt->length)); memcpy(out, &hdr, sizeof(hdr)); (void) flags; if ( af == AF_INET ) { if ( !IP::Send(pkt, &sendfrom.in.sin_addr, &sendto.in.sin_addr, IPPROTO_ICMP, ifindex, broadcast) ) return -1; } // TODO: IPv6 support. else return errno = EAFNOSUPPORT, -1; return count; } ssize_t PingSocket::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) { struct msghdr msg; memset(&msg, 0, sizeof(msg)); msg.msg_iov = (struct iovec*) iov; msg.msg_iovlen = iovcnt; return sendmsg_internal(ctx, &msg, 0); } short PingSocket::PollEventStatus() { short status = 0; if ( first_packet || (how_shutdown & SHUT_RD) ) status |= POLLIN | POLLRDNORM; if ( !(how_shutdown & SHUT_WR) ) status |= POLLOUT | POLLWRNORM; else status |= POLLHUP; if ( sockerr ) status |= POLLERR; return status; } int PingSocket::poll(ioctx_t* /*ctx*/, PollNode* node) { ScopedLock lock(&socket_lock); short ret_status = PollEventStatus() & node->events; if ( ret_status ) { node->master->revents |= ret_status; return 0; } poll_channel.Register(node); return errno = EAGAIN, -1; } int PingSocket::getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr) { ScopedLock lock(&socket_lock); if ( level == SOL_SOCKET && option_name == SO_BINDTODEVICE ) { ScopedLock lock(&netifs_lock); const char* ifname = ""; if ( ifindex < netifs_count && netifs[ifindex] ) ifname = netifs[ifindex]->ifinfo.name; size_t option_size; if ( !CopyFromUser(&option_size, option_size_ptr, sizeof(option_size)) ) return -1; size_t len = strlen(ifname); size_t size = len + 1; if ( option_size < size ) return errno = ERANGE, -1; if ( !CopyToUser(option_value, ifname, size) || !CopyToUser(option_size_ptr, &size, sizeof(size)) ) return -1; return 0; } uintmax_t result = 0; if ( level == IPPROTO_PING ) { switch ( option_name ) { default: return errno = ENOPROTOOPT, -1; } } else if ( level == SOL_SOCKET ) { switch ( option_name ) { case SO_BINDTOINDEX: result = ifindex; break; case SO_BROADCAST: result = broadcast; break; case SO_DEBUG: result = 0; break; case SO_DOMAIN: result = af; break; case SO_DONTROUTE: result = 0; break; case SO_ERROR: result = sockerr; sockerr = 0; break; case SO_PROTOCOL: result = IPPROTO_PING; break; case SO_RCVBUF: result = receive_limit; break; case SO_REUSEADDR: result = reuseaddr; break; case SO_SNDBUF: result = send_limit; break; case SO_TYPE: result = SOCK_DGRAM; break; default: return errno = ENOPROTOOPT, -1; } } else return errno = EINVAL, -1; if ( !sockopt_return_uintmax(result, ctx, option_value, option_size_ptr) ) return -1; return 0; } int PingSocket::setsockopt(ioctx_t* ctx, int level, int option_name, const void* option_value, size_t option_size) { ScopedLock lock(&socket_lock); if ( level == SOL_SOCKET && option_name == SO_BINDTODEVICE ) { char ifname[IF_NAMESIZE]; if ( sizeof(ifname) < option_size ) option_size = sizeof(ifname); if ( !CopyFromUser(ifname, option_value, option_size) ) return -1; if ( strnlen(ifname, option_size) == sizeof(ifname) ) return errno = ENODEV, -1; ifname[option_size] = '\0'; ScopedLock lock(&netifs_lock); for ( size_t i = 1; i < netifs_count; i++ ) { if ( netifs[i] && !strcmp(ifname, netifs[i]->ifinfo.name) ) { ifindex = i; return 0; } } return errno = ENODEV, -1; } uintmax_t value; if ( !sockopt_fetch_uintmax(&value, ctx, option_value, option_size) ) return -1; if ( level == IPPROTO_PING ) { switch ( option_name ) { default: return errno = ENOPROTOOPT, -1; } } else if ( level == SOL_SOCKET ) { switch ( option_name ) { case SO_BINDTOINDEX: if ( UINT_MAX < value ) return errno = EINVAL, -1; ifindex = value; break; case SO_BROADCAST: broadcast = value; break; case SO_DEBUG: if ( value != 0 ) return errno = EPERM, -1; break; case SO_DONTROUTE: if ( value != 0 ) return errno = EPERM, -1; break; case SO_RCVBUF: { size_t hard_limit = MAXIMAL_PACKET_LIMIT * Page::Size(); if ( hard_limit < value ) value = hard_limit; receive_limit = value; // Shrink the receive queue until it fits. while ( first_packet && receive_limit < receive_current ) { Ref packet = first_packet; first_packet->next.Reset(); first_packet = first_packet->next; receive_current -= packet->pmap.size; } if ( !first_packet ) last_packet.Reset(); break; } case SO_REUSEADDR: reuseaddr = value; break; case SO_SNDBUF: { size_t hard_limit = MAXIMAL_PACKET_LIMIT * Page::Size(); if ( hard_limit < value ) value = hard_limit; // TODO: This value is unused. send_limit = value; break; } default: return errno = ENOPROTOOPT, -1; } } else return errno = EINVAL, -1; return 0; } int PingSocket::shutdown(ioctx_t* ctx, int how) { (void) ctx; ScopedLock lock(&socket_lock); if ( how & ~(SHUT_RD | SHUT_WR) ) return errno = EINVAL, -1; how_shutdown |= how; // Drop the receive queue if shut down for read. if ( how & SHUT_RD ) { // Avoid stack overflow in first_packet recursive destructor. while ( first_packet ) { Ref next = first_packet->next; first_packet->next.Reset(); first_packet = next; } last_packet.Reset(); } kthread_cond_broadcast(&receive_cond); poll_channel.Signal(PollEventStatus()); return 0; } int PingSocket::getpeername(ioctx_t* ctx, uint8_t* addr, size_t* addrsize_ptr) { ScopedLock lock(&socket_lock); if ( !connected ) return errno = ENOTCONN, -1; size_t addrsize; if ( !ctx->copy_from_src(&addrsize, addrsize_ptr, sizeof(addrsize)) ) return -1; if ( af == AF_INET ) { if ( sizeof(remote.in) < addrsize ) addrsize = sizeof(remote.in); } else if ( af == AF_INET6 ) { if ( sizeof(remote.in6) < addrsize ) addrsize = sizeof(remote.in6); } else return errno = EAFNOSUPPORT, -1; if ( !ctx->copy_to_dest(addr, &remote, addrsize) ) return -1; if ( !ctx->copy_to_dest(addrsize_ptr, &addrsize, sizeof(addrsize)) ) return -1; return 0; } int PingSocket::getsockname(ioctx_t* ctx, uint8_t* addr, size_t* addrsize_ptr) { ScopedLock lock(&socket_lock); size_t addrsize; if ( !ctx->copy_from_src(&addrsize, addrsize_ptr, sizeof(addrsize)) ) return -1; if ( af == AF_INET ) { if ( sizeof(local.in) < addrsize ) addrsize = sizeof(local.in); } else if ( af == AF_INET6 ) { if ( sizeof(local.in6) < addrsize ) addrsize = sizeof(local.in6); } else return errno = EAFNOSUPPORT, -1; if ( !ctx->copy_to_dest(addr, &local, addrsize) ) return -1; if ( !ctx->copy_to_dest(addrsize_ptr, &addrsize, sizeof(addrsize)) ) return -1; return 0; } // socket_lock locked void PingSocket::ReceivePacket(Ref pkt) { if ( how_shutdown & SHUT_RD ) return; // Drop the packet if the receive queue is full. if ( receive_limit < receive_current ) return; size_t available = receive_limit - receive_current; if ( available < pkt->pmap.size ) return; // Add the packet to the receive queue. receive_current += pkt->pmap.size; if ( last_packet ) { last_packet->next = pkt; last_packet = pkt; } else { first_packet = pkt; last_packet = pkt; } kthread_cond_broadcast(&receive_cond); poll_channel.Signal(PollEventStatus()); } void HandleIP(Ref pkt, const struct in_addr* src, const struct in_addr* dst, bool dst_broadcast) { if ( src->s_addr == htobe32(INADDR_ANY) ) return; const unsigned char* in = pkt->from + pkt->offset; size_t inlen = pkt->length - pkt->offset; if ( IP::ipsum(in, inlen) != 0 ) return; struct icmp_echo hdr; if ( inlen < sizeof(hdr) ) return; memcpy(&hdr, in, sizeof(hdr)); if ( hdr.type == ICMP_TYPE_ECHO_REQUEST ) { // Ignore broadcast echo requests as permitted by RFC 1122 3.2.2.6. if ( dst_broadcast ) return; if ( hdr.code != 0 ) return; size_t payload_length = inlen - sizeof(hdr); struct in_addr sendfrom; if ( !IP::GetSourceIP(dst, src, &sendfrom, 0) ) return; Ref out_pkg = GetPacket(); if ( !out_pkg ) return; unsigned char* out = out_pkg->from; struct icmp_echo reply; reply.type = ICMP_TYPE_ECHO_REPLY; reply.code = 0; reply.checksum = 0; reply.identifier = hdr.identifier; reply.sequence = hdr.sequence; out_pkg->length = sizeof(reply) + payload_length; if ( out_pkg->pmap.size < out_pkg->length ) return; memcpy(out, &reply, sizeof(reply)); memcpy(out + sizeof(reply), in + sizeof(hdr), payload_length); reply.checksum = htobe16(IP::ipsum(out, out_pkg->length)); memcpy(out, &reply, sizeof(reply)); if ( !IP::Send(out_pkg, &sendfrom, src, IPPROTO_ICMP, 0, false) ) return; return; } if ( hdr.type != ICMP_TYPE_ECHO_REPLY ) return; pkt->offset += sizeof(hdr) - sizeof(hdr.sequence); hdr.identifier = be16toh(hdr.identifier); // Port 0 is not valid. if ( hdr.identifier == 0 ) return; ScopedLock lock1(&bind_lock); // Find the socket that would receive the datagram sent to that address // and port, or if no such socket, perhaps a socket bound to the any address // and that port. PingSocket* socket = NULL; PingSocket* any_socket = NULL; for ( PingSocket* iter = bindings_v4[hdr.identifier]; !socket && iter; iter = iter->next_socket ) { // Receive the datagram only if sent to the socket's address. if ( !memcmp(&iter->local.in.sin_addr, dst, sizeof(*dst)) ) socket = iter; // Receive the datagram only if the socket's address was the any address // (and no other socket is bound to the datagram's destination address // and port). if ( iter->local.in.sin_addr.s_addr == htobe32(INADDR_ANY) ) any_socket = iter; } // If no socket was bound to the datagram's destination address and port, // try to deliver it to a socket bound to the any address and that port. if ( !socket ) socket = any_socket; // Drop the datagram is no socket would receive it. if ( !socket ) return; // If connected, require the source address is the remote address and the // source port is the remote port, otherwise drop the datagram. if ( socket->connected && (memcmp(&socket->remote.in.sin_addr, src, sizeof(*src)) != 0 ) ) return; ScopedLock lock2(&socket->socket_lock); // If the socket is bound to a network interface, require the datagram to // have been received on that network interface. if ( socket->ifindex && socket->ifindex != pkt->netif->ifinfo.linkid ) return; // Prepend the source address to the packet. struct sockaddr_in from_addr; memset(&from_addr, 0, sizeof(from_addr)); from_addr.sin_family = AF_INET; from_addr.sin_port = htobe16(hdr.identifier); from_addr.sin_addr = *src; if ( pkt->offset < sizeof(from_addr) ) return; pkt->offset -= sizeof(from_addr); memcpy(pkt->from + pkt->offset, &from_addr, sizeof(from_addr)); // Receive the datagram on the socket. socket->ReceivePacket(pkt); } Ref Socket(int af) { if ( !IsSupportedAddressFamily(af) ) return errno = EAFNOSUPPORT, Ref(NULL); return Ref(new PingSocket(af)); } } // namespace Ping } // namespace Sortix