Add Unix domain sockets for local IPC #37

Merged
apio merged 16 commits from unix-sockets into main 2023-07-30 09:49:38 +00:00
49 changed files with 1027 additions and 136 deletions

View File

@ -42,3 +42,5 @@ luna_app(cp.cpp cp)
luna_app(kill.cpp kill)
luna_app(gol.cpp gol)
luna_app(buffer-test.cpp buffer-test)
luna_app(socket-test.cpp socket-test)
luna_app(socket-client.cpp socket-client)

48
apps/socket-client.cpp Normal file
View File

@ -0,0 +1,48 @@
#include <os/ArgumentParser.h>
#include <stdio.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
Result<int> luna_main(int argc, char** argv)
{
StringView message;
os::ArgumentParser parser;
parser.add_description("A UNIX domain socket client, to test said sockets.");
parser.add_system_program_info("socket-client"_sv);
parser.add_positional_argument(message, "message"_sv, "exit"_sv);
parser.parse(argc, argv);
int sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sockfd < 0)
{
perror("socket");
return 0;
}
struct sockaddr_un un;
un.sun_family = AF_UNIX;
strncpy(un.sun_path, "/tmp/local.sock", sizeof(un.sun_path));
if (connect(sockfd, (struct sockaddr*)&un, sizeof(un)) < 0)
{
perror("connect");
return 1;
}
char buf[4096];
ssize_t nread = read(sockfd, buf, sizeof(buf) - 1);
if (nread > 0)
{
buf[nread] = 0;
printf("Message from server: %s\n", buf);
}
write(sockfd, message.chars(), message.length());
close(sockfd);
return 0;
}

75
apps/socket-test.cpp Normal file
View File

@ -0,0 +1,75 @@
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <unistd.h>
int main()
{
setgid(1000);
setuid(1000);
remove("/tmp/local.sock");
int sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sockfd < 0)
{
perror("socket");
return 0;
}
struct sockaddr_un un;
un.sun_family = AF_UNIX;
strncpy(un.sun_path, "/tmp/local.sock", sizeof(un.sun_path));
if (bind(sockfd, (struct sockaddr*)&un, sizeof(un)) < 0)
{
perror("bind");
return 1;
}
if (listen(sockfd, 10) < 0)
{
perror("listen");
return 1;
}
while (1)
{
int fd = accept(sockfd, NULL, NULL);
if (fd < 0)
{
perror("accept");
return 1;
}
puts("New connection from client, sending hello");
const char* message = "Hello, client!";
write(fd, message, strlen(message));
puts("Now waiting for client to message back");
char buf[4096];
ssize_t nread = read(fd, buf, sizeof(buf) - 1);
if (nread >= 0)
{
buf[nread] = 0;
printf("Message from client: %s\n", buf);
if (!strcasecmp(buf, "exit"))
{
close(fd);
close(sockfd);
remove("/tmp/local.sock");
return 0;
}
}
else { printf("Error reading from client: %s\n", strerror(errno)); }
puts("Transmission ended, closing connection");
close(fd);
}
}

6
base/etc/init/98-listen Normal file
View File

@ -0,0 +1,6 @@
Name=listen
Description=Start a Unix domain socket test server.
Command=/usr/bin/socket-test
StandardOutput=/dev/uart0
StandardError=/dev/uart0
Restart=true

View File

@ -41,11 +41,13 @@ set(SOURCES
src/sys/mount.cpp
src/sys/resource.cpp
src/sys/signal.cpp
src/sys/socket.cpp
src/fs/VFS.cpp
src/fs/Pipe.cpp
src/fs/Mount.cpp
src/fs/MBR.cpp
src/fs/GPT.cpp
src/net/UnixSocket.cpp
src/fs/tmpfs/FileSystem.cpp
src/fs/tmpfs/Inode.cpp
src/fs/ext2/FileSystem.cpp

View File

@ -310,7 +310,7 @@ class ATADevice : public Device
return err(ENOTSUP);
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -41,7 +41,7 @@ namespace MBR
Result<usize> write(const u8* buf, usize offset, usize length) override;
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -37,7 +37,7 @@ class MountInode : public VFS::Inode, public LinkedListNode<MountInode>
return err(EISDIR);
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -46,7 +46,7 @@ Result<usize> Pipe::write(const u8* buf, usize, usize length)
return length;
}
bool Pipe::blocking() const
bool Pipe::will_block_if_read() const
{
return !m_data_buffer.size() && m_writer;
}

View File

@ -15,7 +15,7 @@ class Pipe : public Shareable
Result<usize> write(const u8* buf, usize, usize length);
bool blocking() const;
bool will_block_if_read() const;
private:
Buffer m_data_buffer;
@ -40,9 +40,9 @@ class PipeInodeBase : public VFS::FileInode
return err(ENOTSUP);
}
bool blocking() const override
bool will_block_if_read() const override
{
return m_pipe->blocking();
return m_pipe->will_block_if_read();
}
usize size() const override

View File

@ -17,6 +17,7 @@ namespace VFS
BlockDevice,
Symlink,
FIFO,
Socket,
};
class Inode;
@ -34,6 +35,11 @@ namespace VFS
virtual Result<SharedPtr<Inode>> create_symlink_inode(StringView link) = 0;
virtual Result<u64> allocate_inode_number()
{
return err(ENOTSUP);
}
virtual Result<void> set_mount_dir(SharedPtr<Inode> parent) = 0;
virtual Result<void> reset_mount_dir() = 0;
@ -86,6 +92,10 @@ namespace VFS
return err(ENOTTY);
}
virtual void did_close()
{
}
// Directory-specific methods
virtual Result<SharedPtr<Inode>> find(const char* name) const = 0;
@ -110,7 +120,7 @@ namespace VFS
virtual Result<void> truncate(usize size) = 0;
virtual bool blocking() const = 0;
virtual bool will_block_if_read() const = 0;
// Symlink-specific methods
virtual Result<StringView> readlink()
@ -221,7 +231,7 @@ namespace VFS
return 0;
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -49,13 +49,7 @@ Result<usize> ConsoleDevice::read(u8* buf, usize, usize length) const
{
TRY(handle_background_process_group(false, SIGTTIN));
if (length > m_input_buffer.size()) length = m_input_buffer.size();
memcpy(buf, m_input_buffer.data(), length);
memmove(m_input_buffer.data(), m_input_buffer.data() + length, m_input_buffer.size() - length);
m_input_buffer.try_resize(m_input_buffer.size() - length).release_value();
length = m_input_buffer.dequeue_data(buf, length);
if (!length && m_may_read_without_blocking) m_may_read_without_blocking = false;
@ -70,7 +64,7 @@ Result<usize> ConsoleDevice::write(const u8* buf, usize, usize length)
return length;
}
bool ConsoleDevice::blocking() const
bool ConsoleDevice::will_block_if_read() const
{
return m_may_read_without_blocking ? false : m_input_buffer.size() == 0;
}

View File

@ -16,7 +16,7 @@ class ConsoleDevice : public Device
static void did_press_or_release_key(u8 scancode);
bool blocking() const override;
bool will_block_if_read() const override;
Result<u64> ioctl(int request, void* arg) override;

View File

@ -41,7 +41,7 @@ class Device : public Shareable
// Path in devfs.
virtual StringView device_path() const = 0;
virtual bool blocking() const = 0;
virtual bool will_block_if_read() const = 0;
virtual ~Device() = default;
};

View File

@ -28,7 +28,7 @@ usize FramebufferDevice::size() const
return Framebuffer::size();
}
bool FramebufferDevice::blocking() const
bool FramebufferDevice::will_block_if_read() const
{
return false;
}

View File

@ -11,7 +11,7 @@ class FramebufferDevice : public Device
Result<usize> write(const u8*, usize, usize) override;
bool blocking() const override;
bool will_block_if_read() const override;
bool is_block_device() const override
{

View File

@ -19,7 +19,7 @@ class FullDevice : public Device
return err(ENOSPC);
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -17,7 +17,7 @@ class NullDevice : public Device
return 0;
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -14,7 +14,7 @@ class UARTDevice : public Device
Result<usize> write(const u8*, usize, usize) override;
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -19,7 +19,7 @@ class ZeroDevice : public Device
return 0;
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -122,7 +122,7 @@ namespace Ext2
return m_entries.size();
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

View File

@ -16,6 +16,11 @@ namespace TmpFS
return (SharedPtr<VFS::FileSystem>)fs;
}
Result<u64> FileSystem::allocate_inode_number()
{
return m_next_inode_number++;
}
Result<SharedPtr<VFS::Inode>> FileSystem::create_file_inode()
{
SharedPtr<FileInode> inode = TRY(make_shared<FileInode>());

View File

@ -18,6 +18,8 @@ namespace TmpFS
Result<SharedPtr<VFS::Inode>> create_device_inode(u32 major, u32 minor) override;
Result<SharedPtr<VFS::Inode>> create_symlink_inode(StringView link) override;
Result<u64> allocate_inode_number() override;
Result<void> set_mount_dir(SharedPtr<VFS::Inode> parent) override;
Result<void> reset_mount_dir() override;

View File

@ -281,9 +281,9 @@ namespace TmpFS
return m_device->isatty();
}
bool blocking() const override
bool will_block_if_read() const override
{
return m_device->blocking();
return m_device->will_block_if_read();
}
usize size() const override
@ -385,7 +385,7 @@ namespace TmpFS
return err(EISDIR);
}
bool blocking() const override
bool will_block_if_read() const override
{
return false;
}

120
kernel/src/net/Socket.h Normal file
View File

@ -0,0 +1,120 @@
#pragma once
#include "arch/CPU.h"
#include "fs/VFS.h"
#include "thread/Thread.h"
#include <bits/socket.h>
class Socket : public VFS::FileInode
{
public:
Socket() = default;
VFS::InodeType type() const override
{
return VFS::InodeType::Socket;
}
void set_fs(VFS::FileSystem* fs)
{
m_fs = fs;
}
void set_inode_number(usize inum)
{
m_inode_number = inum;
}
VFS::FileSystem* fs() const override
{
return m_fs;
}
usize inode_number() const override
{
return m_inode_number;
}
Result<usize> read(u8* buf, usize, usize length) const override
{
return recv(buf, length, 0);
}
Result<usize> write(const u8* buf, usize, usize length) override
{
return send(buf, length, 0);
}
virtual Result<usize> send(const u8*, usize, int) = 0;
virtual Result<usize> recv(u8*, usize, int) const = 0;
virtual Result<void> bind(SharedPtr<Socket>, struct sockaddr*, socklen_t) = 0;
virtual Result<void> connect(Registers*, int, struct sockaddr*, socklen_t) = 0;
virtual Result<SharedPtr<OpenFileDescription>> accept(Registers*, int, struct sockaddr**, socklen_t*) = 0;
virtual Result<void> listen(int backlog) = 0;
Result<void> truncate(usize) override
{
return err(EINVAL);
}
usize size() const override
{
return 0;
}
mode_t mode() const override
{
return m_mode;
}
u32 uid() const override
{
return m_uid;
}
u32 gid() const override
{
return m_gid;
}
nlink_t nlinks() const override
{
return (nlink_t)m_nlinks;
}
Result<void> chmod(mode_t mode) override
{
m_mode = mode;
return {};
}
Result<void> chown(u32 uid, u32 gid) override
{
m_uid = uid;
m_gid = gid;
return {};
}
void did_link() override
{
m_nlinks++;
}
void did_unlink() override
{
m_nlinks--;
}
virtual ~Socket() = default;
protected:
VFS::FileSystem* m_fs { nullptr };
usize m_inode_number { 0 };
mode_t m_mode;
u32 m_uid { 0 };
u32 m_gid { 0 };
u32 m_nlinks { 0 };
};

View File

@ -0,0 +1,204 @@
#include "net/UnixSocket.h"
#include <bits/open-flags.h>
#include <luna/PathParser.h>
#include <thread/Scheduler.h>
UnixSocket::UnixSocket()
{
}
UnixSocket::UnixSocket(UnixSocket* peer) : m_state(State::Connected), m_peer(peer)
{
}
UnixSocket::~UnixSocket()
{
did_close();
}
void UnixSocket::did_close()
{
if (m_peer)
{
m_peer->m_peer = nullptr;
m_peer->m_state = State::Reset;
}
m_state = State::Inactive;
}
void UnixSocket::connect_to_peer(UnixSocket* peer)
{
m_peer = peer;
m_state = State::Connected;
}
Result<usize> UnixSocket::send(const u8* buf, usize length, int)
{
if (m_state == State::Reset) return err(ECONNRESET);
if (m_state != State::Connected) return err(ENOTCONN);
check(m_peer);
TRY(m_peer->m_data.append_data(buf, length));
return length;
}
Result<usize> UnixSocket::recv(u8* buf, usize length, int) const
{
if (m_state == State::Reset && !m_data.size()) return err(ECONNRESET);
if (m_state != State::Connected && m_state != State::Reset) return err(ENOTCONN);
return m_data.dequeue_data(buf, length);
}
static Result<void> bind_socket_to_fs(const char* path, Credentials auth, SharedPtr<VFS::Inode> working_directory,
SharedPtr<UnixSocket> socket)
{
auto parent_path = TRY(PathParser::dirname(path));
auto parent_inode = TRY(VFS::resolve_path(parent_path.chars(), auth, working_directory));
if (!VFS::can_write(parent_inode, auth)) return err(EACCES);
auto child_name = TRY(PathParser::basename(path));
TRY(VFS::validate_filename(child_name.view()));
socket->set_inode_number(TRY(parent_inode->fs()->allocate_inode_number()));
socket->set_fs(parent_inode->fs());
return parent_inode->add_entry(socket, child_name.chars());
}
Result<void> UnixSocket::bind(SharedPtr<Socket> socket, struct sockaddr* addr, socklen_t addrlen)
{
if (!addr) return err(EDESTADDRREQ);
if (addr->sa_family != AF_UNIX) return err(EAFNOSUPPORT);
if ((usize)addrlen > sizeof(sockaddr_un)) return err(EINVAL);
if (m_state == State::Connected) return err(EISCONN);
if (m_state != State::Inactive) return err(EINVAL);
struct sockaddr_un* un_address = (struct sockaddr_un*)addr;
String path = TRY(String::from_string_view(
StringView::from_fixed_size_cstring(un_address->sun_path, addrlen - sizeof(sa_family_t))));
auto* current = Scheduler::current();
socket->chmod(0777 & ~current->umask);
socket->chown(current->auth.euid, current->auth.egid);
auto rc = bind_socket_to_fs(path.chars(), current->auth, current->current_directory, socket);
if (rc.has_error())
{
if (rc.error() == EEXIST) return err(EADDRINUSE);
return rc.release_error();
}
memcpy(&m_addr, un_address, addrlen);
m_addrlen = addrlen;
m_state = State::Bound;
return {};
}
Result<void> UnixSocket::connect(Registers* regs, int flags, struct sockaddr* addr, socklen_t addrlen)
{
if (!addr) return err(EINVAL);
if (addr->sa_family != AF_UNIX) return err(EAFNOSUPPORT);
if ((usize)addrlen > sizeof(sockaddr_un)) return err(EINVAL);
if (m_state == State::Connected) return err(EISCONN);
if (m_state == State::Connecting) return err(EALREADY);
if (m_state != State::Inactive) return err(EINVAL);
struct sockaddr_un* un_address = (struct sockaddr_un*)addr;
String path = TRY(String::from_string_view(
StringView::from_fixed_size_cstring(un_address->sun_path, addrlen - sizeof(sa_family_t))));
auto* current = Scheduler::current();
auto inode = TRY(VFS::resolve_path(path.chars(), current->auth, current->current_directory));
if (inode->type() != VFS::InodeType::Socket)
return err(ENOTSOCK); // FIXME: POSIX doesn't say what error to return here?
if (!VFS::can_write(inode, current->auth)) return err(EACCES);
auto socket = (SharedPtr<UnixSocket>)inode;
if (socket->m_state != State::Listening) return err(ECONNREFUSED);
if (!socket->m_listen_queue.try_push(this)) return err(ECONNREFUSED);
if (socket->m_blocked_thread) socket->m_blocked_thread->wake_up();
m_state = Connecting;
if (flags & O_NONBLOCK) return err(EINPROGRESS);
while (1)
{
m_blocked_thread = current;
kernel_wait_for_event();
m_blocked_thread = nullptr;
if (current->interrupted)
{
if (current->will_invoke_signal_handler()) return err(EINTR);
current->process_pending_signals(regs);
continue;
}
break;
}
check(m_state == Connected);
check(m_peer);
return {};
}
Result<void> UnixSocket::listen(int backlog)
{
if (backlog < 0) backlog = 0;
if (m_state == State::Listening || m_state == State::Connected) return err(EINVAL);
if (m_state != State::Bound) return err(EDESTADDRREQ);
TRY(m_listen_queue.set_size(backlog));
m_state = State::Listening;
return {};
}
Result<SharedPtr<OpenFileDescription>> UnixSocket::accept(Registers* regs, int flags, struct sockaddr** addr,
socklen_t* addrlen)
{
if (m_state != State::Listening) return err(EINVAL);
auto* current = Scheduler::current();
UnixSocket* peer = nullptr;
while (!m_listen_queue.try_pop(peer))
{
if (flags & O_NONBLOCK) return err(EAGAIN);
m_blocked_thread = current;
kernel_wait_for_event();
m_blocked_thread = nullptr;
if (current->interrupted)
{
if (current->will_invoke_signal_handler()) return err(EINTR);
current->process_pending_signals(regs);
continue;
}
}
check(peer);
auto socket = TRY(make_shared<UnixSocket>(peer));
auto description = TRY(make_shared<OpenFileDescription>(socket, O_RDWR));
peer->m_peer = socket.ptr();
peer->m_state = State::Connected;
if (peer->m_blocked_thread) peer->m_blocked_thread->wake_up();
*addr = (struct sockaddr*)&peer->m_addr;
*addrlen = peer->m_addrlen;
return description;
}

View File

@ -0,0 +1,57 @@
#pragma once
#include "net/Socket.h"
#include "thread/Thread.h"
#include <luna/Buffer.h>
#include <luna/CircularQueue.h>
#include <luna/String.h>
#include <sys/un.h>
class UnixSocket : public Socket
{
public:
UnixSocket();
UnixSocket(UnixSocket* peer);
bool will_block_if_read() const override
{
return (m_state == Connected || m_state == Reset) && !m_data.size();
}
Result<usize> send(const u8*, usize, int) override;
Result<usize> recv(u8*, usize, int) const override;
Result<void> bind(SharedPtr<Socket>, struct sockaddr*, socklen_t) override;
Result<void> connect(Registers*, int, struct sockaddr*, socklen_t) override;
Result<SharedPtr<OpenFileDescription>> accept(Registers*, int, struct sockaddr**, socklen_t*) override;
Result<void> listen(int backlog) override;
void did_close() override;
void connect_to_peer(UnixSocket* peer);
virtual ~UnixSocket();
private:
enum State
{
Inactive,
Bound,
Listening,
Connecting,
Connected,
Reset,
};
State m_state = State::Inactive;
UnixSocket* m_peer = nullptr;
mutable Buffer m_data;
Thread* m_blocked_thread { nullptr };
DynamicCircularQueue<UnixSocket*> m_listen_queue;
struct sockaddr_un m_addr = { .sun_family = AF_UNIX, .sun_path = {} };
socklen_t m_addrlen = sizeof(sa_family_t);
};

View File

@ -92,11 +92,7 @@ Result<u64> sys_execve(Registers* regs, SyscallArgs args)
{
auto& descriptor = current->fd_table[i];
if (!descriptor.has_value()) continue;
if (descriptor->flags & O_CLOEXEC)
{
descriptor->inode->remove_handle();
descriptor = {};
}
if (descriptor->description->flags & O_CLOEXEC) { descriptor = {}; }
}
if (VFS::is_setuid(inode)) current->auth.euid = current->auth.suid = inode->uid();
@ -148,11 +144,7 @@ Result<u64> sys_fork(Registers* regs, SyscallArgs)
thread->umask = current->umask;
thread->parent = current;
for (int i = 0; i < FD_MAX; i++)
{
thread->fd_table[i] = current->fd_table[i];
if (current->fd_table[i].has_value()) current->fd_table[i]->inode->add_handle();
}
for (int i = 0; i < FD_MAX; i++) { thread->fd_table[i] = current->fd_table[i]; }
image->apply(thread);

View File

@ -26,7 +26,7 @@ Result<u64> sys_read(Registers* regs, SyscallArgs args)
if (!descriptor.is_readable()) return err(EBADF);
while (descriptor.inode->blocking())
while (descriptor.inode()->will_block_if_read())
{
if (descriptor.should_block()) kernel_sleep(10);
else
@ -40,9 +40,9 @@ Result<u64> sys_read(Registers* regs, SyscallArgs args)
}
}
usize nread = TRY(descriptor.inode->read(buf, descriptor.offset, size));
usize nread = TRY(descriptor.inode()->read(buf, descriptor.offset, size));
if (VFS::is_seekable(descriptor.inode)) descriptor.offset += nread;
if (VFS::is_seekable(descriptor.inode())) descriptor.offset += nread;
return nread;
}
@ -63,11 +63,12 @@ Result<u64> sys_write(Registers*, SyscallArgs args)
if (!descriptor.is_writable()) return err(EBADF);
if (descriptor.should_append() && VFS::is_seekable(descriptor.inode)) descriptor.offset = descriptor.inode->size();
if (descriptor.should_append() && VFS::is_seekable(descriptor.inode()))
descriptor.offset = descriptor.inode()->size();
usize nwritten = TRY(descriptor.inode->write(buf, descriptor.offset, size));
usize nwritten = TRY(descriptor.inode()->write(buf, descriptor.offset, size));
if (VFS::is_seekable(descriptor.inode)) descriptor.offset += nwritten;
if (VFS::is_seekable(descriptor.inode())) descriptor.offset += nwritten;
return nwritten;
}
@ -82,9 +83,9 @@ Result<u64> sys_lseek(Registers*, SyscallArgs args)
auto& descriptor = *TRY(current->resolve_fd(fd));
if (descriptor.inode->type() == VFS::InodeType::FIFO) return err(ESPIPE);
if (descriptor.inode()->type() == VFS::InodeType::FIFO) return err(ESPIPE);
if (!VFS::is_seekable(descriptor.inode)) return descriptor.offset;
if (!VFS::is_seekable(descriptor.inode())) return descriptor.offset;
off_t new_offset;
@ -92,7 +93,7 @@ Result<u64> sys_lseek(Registers*, SyscallArgs args)
{
case SEEK_SET: new_offset = offset; break;
case SEEK_CUR: new_offset = TRY(safe_add((long)descriptor.offset, offset)); break;
case SEEK_END: new_offset = TRY(safe_add((long)descriptor.inode->size(), offset)); break;
case SEEK_END: new_offset = TRY(safe_add((long)descriptor.inode()->size(), offset)); break;
default: return err(EINVAL);
}
@ -123,28 +124,28 @@ Result<u64> sys_fcntl(Registers*, SyscallArgs args)
current->fd_table[new_fd] = descriptor;
if (is_cloexec) current->fd_table[new_fd]->flags |= O_CLOEXEC;
if (is_cloexec) current->fd_table[new_fd]->flags() |= O_CLOEXEC;
else
current->fd_table[new_fd]->flags &= ~O_CLOEXEC;
current->fd_table[new_fd]->flags() &= ~O_CLOEXEC;
return (u64)new_fd;
}
case F_GETFD: return (u64) !!(descriptor.flags & O_CLOEXEC);
case F_GETFD: return (u64) !!(descriptor.flags() & O_CLOEXEC);
case F_SETFD: {
int arg = (int)args[2];
if (arg == FD_CLOEXEC) descriptor.flags |= O_CLOEXEC;
if (arg == FD_CLOEXEC) descriptor.flags() |= O_CLOEXEC;
else
descriptor.flags &= ~O_CLOEXEC;
descriptor.flags() &= ~O_CLOEXEC;
return 0;
}
case F_GETFL: return (u64)(descriptor.flags & ~O_CLOEXEC);
case F_GETFL: return (u64)(descriptor.flags() & ~O_CLOEXEC);
case F_SETFL: {
int arg = (int)args[2];
descriptor.flags &= ~(O_APPEND | O_NONBLOCK);
descriptor.flags() &= ~(O_APPEND | O_NONBLOCK);
arg &= (O_APPEND | O_NONBLOCK);
descriptor.flags |= arg;
descriptor.flags() |= arg;
return 0;
}
@ -161,7 +162,7 @@ Result<u64> sys_ioctl(Registers*, SyscallArgs args)
Thread* current = Scheduler::current();
auto& descriptor = *TRY(current->resolve_fd(fd));
return descriptor.inode->ioctl(request, arg);
return descriptor.inode()->ioctl(request, arg);
}
Result<u64> sys_isatty(Registers*, SyscallArgs args)
@ -171,7 +172,7 @@ Result<u64> sys_isatty(Registers*, SyscallArgs args)
Thread* current = Scheduler::current();
auto& descriptor = *TRY(current->resolve_fd(fd));
return descriptor.inode->isatty();
return descriptor.inode()->isatty();
}
Result<u64> sys_dup2(Registers*, SyscallArgs args)
@ -188,7 +189,7 @@ Result<u64> sys_dup2(Registers*, SyscallArgs args)
if (newfd == oldfd) return (u64)newfd;
current->fd_table[newfd] = descriptor;
current->fd_table[newfd]->flags &= ~O_CLOEXEC;
current->fd_table[newfd]->flags() &= ~O_CLOEXEC;
return (u64)newfd;
}
@ -210,8 +211,8 @@ Result<u64> sys_pipe(Registers*, SyscallArgs args)
TRY(Pipe::create(rpipe, wpipe));
current->fd_table[rfd] = FileDescriptor { rpipe, 0, O_RDONLY };
current->fd_table[wfd] = FileDescriptor { wpipe, 0, O_WRONLY };
current->fd_table[rfd] = FileDescriptor { TRY(make_shared<OpenFileDescription>(rpipe, O_RDONLY)), 0 };
current->fd_table[wfd] = FileDescriptor { TRY(make_shared<OpenFileDescription>(wpipe, O_WRONLY)), 0 };
return 0;
}

View File

@ -14,13 +14,13 @@ Result<u64> sys_getdents(Registers*, SyscallArgs args)
auto& descriptor = *TRY(current->resolve_fd(fd));
if (descriptor.inode->type() != VFS::InodeType::Directory) return err(ENOTDIR);
if (descriptor.inode()->type() != VFS::InodeType::Directory) return err(ENOTDIR);
usize nwrite = 0;
while (nwrite < count)
{
VFS::DirectoryEntry entry;
bool ok = descriptor.inode->get(descriptor.offset).try_set_value(entry);
bool ok = descriptor.inode()->get(descriptor.offset).try_set_value(entry);
if (!ok) break;
descriptor.offset++;

View File

@ -55,6 +55,8 @@ Result<u64> sys_openat(Registers*, SyscallArgs args)
// This should only be possible if O_NOFOLLOW was in flags.
if (inode->type() == VFS::InodeType::Symlink) return err(ELOOP);
if (inode->type() == VFS::InodeType::Socket) return err(ENXIO);
if (flags & O_TMPFILE)
{
if (inode->type() != VFS::InodeType::Directory) return err(EINVAL);
@ -80,9 +82,7 @@ Result<u64> sys_openat(Registers*, SyscallArgs args)
kdbgln("openat: opening file %s from dirfd %d, flags %d, mode %#o = fd %d", path.chars(), dirfd, flags, mode, fd);
#endif
inode->add_handle();
current->fd_table[fd] = FileDescriptor { inode, 0, flags & FLAGS_TO_KEEP };
current->fd_table[fd] = FileDescriptor { TRY(make_shared<OpenFileDescription>(inode, flags & FLAGS_TO_KEEP)), 0 };
return (u64)fd;
}
@ -98,8 +98,6 @@ Result<u64> sys_close(Registers*, SyscallArgs args)
if (!descriptor.has_value()) return err(EBADF);
descriptor->inode->remove_handle();
descriptor = {};
return 0;

129
kernel/src/sys/socket.cpp Normal file
View File

@ -0,0 +1,129 @@
#include "net/Socket.h"
#include "memory/MemoryManager.h"
#include "net/UnixSocket.h"
#include "sys/Syscall.h"
#include "thread/Scheduler.h"
#include <bits/open-flags.h>
Result<u64> sys_socket(Registers*, SyscallArgs args)
{
int domain = (int)args[0];
int type = (int)args[1];
// protocol is not used for now.
if (type != SOCK_STREAM) return err(EPROTOTYPE);
if (domain != AF_UNIX) return err(EAFNOSUPPORT);
auto socket = TRY(make_shared<UnixSocket>());
auto* current = Scheduler::current();
int fd = TRY(current->allocate_fd(0));
current->fd_table[fd] = FileDescriptor { TRY(make_shared<OpenFileDescription>(socket, O_RDWR)), 0 };
return fd;
}
Result<u64> sys_bind(Registers*, SyscallArgs args)
{
int sockfd = (int)args[0];
struct sockaddr* addr = (struct sockaddr*)args[1];
socklen_t addrlen = (socklen_t)args[2];
struct sockaddr_storage storage;
if ((usize)addrlen > sizeof(storage)) return err(EINVAL);
if (!MemoryManager::copy_from_user(addr, &storage, addrlen)) return err(EFAULT);
auto* current = Scheduler::current();
auto inode = TRY(current->resolve_fd(sockfd))->inode();
if (inode->type() != VFS::InodeType::Socket) return err(ENOTSOCK);
auto socket = (SharedPtr<Socket>)inode;
TRY(socket->bind(socket, (struct sockaddr*)&storage, addrlen));
return 0;
}
Result<u64> sys_connect(Registers* regs, SyscallArgs args)
{
int sockfd = (int)args[0];
struct sockaddr* addr = (struct sockaddr*)args[1];
socklen_t addrlen = (socklen_t)args[2];
struct sockaddr_storage storage;
if ((usize)addrlen > sizeof(storage)) return err(EINVAL);
if (!MemoryManager::copy_from_user(addr, &storage, addrlen)) return err(EFAULT);
auto* current = Scheduler::current();
auto description = TRY(current->resolve_fd(sockfd))->description;
if (description->inode->type() != VFS::InodeType::Socket) return err(ENOTSOCK);
auto socket = (SharedPtr<Socket>)description->inode;
TRY(socket->connect(regs, description->flags, (struct sockaddr*)&storage, addrlen));
return 0;
}
Result<u64> sys_listen(Registers*, SyscallArgs args)
{
int sockfd = (int)args[0];
int backlog = (int)args[1];
auto* current = Scheduler::current();
auto inode = TRY(current->resolve_fd(sockfd))->inode();
if (inode->type() != VFS::InodeType::Socket) return err(ENOTSOCK);
auto socket = (SharedPtr<Socket>)inode;
TRY(socket->listen(backlog));
return 0;
}
Result<u64> sys_accept(Registers* regs, SyscallArgs args)
{
int sockfd = (int)args[0];
struct sockaddr* addr = (struct sockaddr*)args[1];
socklen_t* addrlen = (socklen_t*)args[2];
if (addr && !addrlen) return err(EINVAL);
socklen_t len;
if (addr)
{
if (!MemoryManager::copy_from_user_typed(addrlen, &len)) return err(EFAULT);
}
auto* current = Scheduler::current();
auto description = TRY(current->resolve_fd(sockfd))->description;
if (description->inode->type() != VFS::InodeType::Socket) return err(ENOTSOCK);
auto socket = (SharedPtr<Socket>)description->inode;
struct sockaddr* client;
socklen_t client_len;
auto new_description = TRY(socket->accept(regs, description->flags, &client, &client_len));
int fd = TRY(current->allocate_fd(0));
current->fd_table[fd] = FileDescriptor { new_description, 0 };
if (client_len < len) len = client_len;
if (addr)
{
MemoryManager::copy_to_user(addr, client, len);
MemoryManager::copy_to_user_typed(addrlen, &client_len);
}
return fd;
}

View File

@ -18,6 +18,7 @@ static mode_t make_mode(mode_t mode, VFS::InodeType type)
case VFS::InodeType::BlockDevice: result |= S_IFBLK; break;
case VFS::InodeType::Symlink: result |= S_IFLNK; break;
case VFS::InodeType::FIFO: result |= S_IFIFO; break;
case VFS::InodeType::Socket: result |= S_IFSOCK; break;
default: break;
}

View File

@ -192,11 +192,6 @@ namespace Scheduler
{
auto stack = thread->kernel_stack;
MemoryManager::unmap_owned_and_free_vm(stack.bottom(), stack.bytes() / ARCH_PAGE_SIZE).release_value();
for (int i = 0; i < FD_MAX; i++)
{
if (thread->fd_table[i].has_value()) thread->fd_table[i]->inode->remove_handle();
}
}
delete thread;

View File

@ -71,11 +71,11 @@ Result<SharedPtr<VFS::Inode>> Thread::resolve_atfile(int dirfd, const String& pa
auto descriptor = TRY(resolve_fd(dirfd));
if (parent_inode) *parent_inode = descriptor->inode;
if (parent_inode) *parent_inode = descriptor->inode();
if (path.is_empty() && allow_empty_path) return descriptor->inode;
if (path.is_empty() && allow_empty_path) return descriptor->inode();
return VFS::resolve_path(path.chars(), this->auth, descriptor->inode, follow_last_symlink);
return VFS::resolve_path(path.chars(), this->auth, descriptor->inode(), follow_last_symlink);
}
[[noreturn]] void Thread::exit_and_signal_parent(int _status)
@ -215,22 +215,33 @@ void Thread::send_signal(int signo)
}
}
OpenFileDescription::OpenFileDescription(SharedPtr<VFS::Inode> ino, int fl) : inode(ino), flags(fl)
{
inode->add_handle();
}
OpenFileDescription::~OpenFileDescription()
{
inode->remove_handle();
inode->did_close();
}
bool FileDescriptor::should_append()
{
return flags & O_APPEND;
return description->flags & O_APPEND;
}
bool FileDescriptor::should_block()
{
return !(flags & O_NONBLOCK);
return !(description->flags & O_NONBLOCK);
}
bool FileDescriptor::is_readable()
{
return flags & O_RDONLY;
return description->flags & O_RDONLY;
}
bool FileDescriptor::is_writable()
{
return flags & O_WRONLY;
return description->flags & O_WRONLY;
}

View File

@ -28,16 +28,34 @@ enum class ThreadState
Dying
};
struct FileDescriptor
struct OpenFileDescription : public Shareable
{
SharedPtr<VFS::Inode> inode;
usize offset { 0 };
int flags { 0 };
OpenFileDescription(SharedPtr<VFS::Inode>, int);
~OpenFileDescription();
};
struct FileDescriptor
{
SharedPtr<OpenFileDescription> description;
usize offset { 0 };
bool should_append();
bool should_block();
bool is_writable();
bool is_readable();
SharedPtr<VFS::Inode> inode()
{
return description->inode;
}
int& flags()
{
return description->flags;
}
};
static constexpr int FD_MAX = 64;

View File

@ -31,6 +31,7 @@ set(SOURCES
src/sys/mount.cpp
src/sys/pstat.cpp
src/sys/resource.cpp
src/sys/socket.cpp
)
if(${LUNA_ARCH} STREQUAL "x86_64")

View File

@ -10,6 +10,7 @@
#define S_IFBLK 030000
#define S_IFDIR 040000
#define S_IFCHR 050000
#define S_IFSOCK 060000
#define __CHECK_TYPE(mode, type) (((mode)&S_IFMT) == type)
@ -19,6 +20,7 @@
#define S_ISBLK(mode) __CHECK_TYPE(mode, S_IFBLK)
#define S_ISLNK(mode) __CHECK_TYPE(mode, S_IFLNK)
#define S_ISFIFO(mode) __CHECK_TYPE(mode, S_IFIFO)
#define S_ISSOCK(mode) __CHECK_TYPE(mode, S_IFSOCK)
#define S_IRWXU 0700
#define S_IRUSR 0400

View File

@ -0,0 +1,30 @@
/* bits/socket.h: Socket-related types and structures. */
#ifndef _BITS_SOCKET_H
#define _BITS_SOCKET_H
typedef int socklen_t;
typedef unsigned sa_family_t;
struct sockaddr
{
sa_family_t sa_family;
char sa_data[4];
};
struct sockaddr_storage
{
sa_family_t ss_family;
union {
char _sun_path[108]; // AF_UNIX
};
};
#define SOCK_STREAM 0
#define AF_UNSPEC 0
#define AF_UNIX 1
#define AF_INET 2
#define AF_INET6 3
#endif

32
libc/include/sys/socket.h Normal file
View File

@ -0,0 +1,32 @@
/* sys/socket.h: Communication sockets. */
#ifndef _SYS_SOCKET_H
#define _SYS_SOCKET_H
#include <bits/socket.h>
#ifdef __cplusplus
extern "C"
{
#endif
/* Create a new socket and return a file descriptor pointing to it. */
int socket(int domain, int type, int protocol);
/* Bind a socket to an address. */
int bind(int sockfd, struct sockaddr* addr, socklen_t addrlen);
/* Connect a socket to a remote address. */
int connect(int sockfd, struct sockaddr* addr, socklen_t addrlen);
/* Start listening on a socket. */
int listen(int sockfd, int backlog);
/* Wait for an incoming connection on a socket. */
int accept(int sockfd, struct sockaddr* addr, socklen_t* addrlen);
#ifdef __cplusplus
}
#endif
#endif

14
libc/include/sys/un.h Normal file
View File

@ -0,0 +1,14 @@
/* sys/un.h: The sockaddr_un structure for AF_UNIX sockets. */
#ifndef _SYS_UN_H
#define _SYS_UN_H
#include <bits/socket.h>
struct sockaddr_un
{
sa_family_t sun_family;
char sun_path[108];
};
#endif

37
libc/src/sys/socket.cpp Normal file
View File

@ -0,0 +1,37 @@
#include <bits/errno-return.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <unistd.h>
extern "C"
{
int socket(int domain, int type, int protocol)
{
long rc = syscall(SYS_socket, domain, type, protocol);
__errno_return(rc, int);
}
int bind(int sockfd, struct sockaddr* addr, socklen_t addrlen)
{
long rc = syscall(SYS_bind, sockfd, addr, addrlen);
__errno_return(rc, int);
}
int connect(int sockfd, struct sockaddr* addr, socklen_t addrlen)
{
long rc = syscall(SYS_connect, sockfd, addr, addrlen);
__errno_return(rc, int);
}
int listen(int sockfd, int backlog)
{
long rc = syscall(SYS_listen, sockfd, backlog);
__errno_return(rc, int);
}
int accept(int sockfd, struct sockaddr* addr, socklen_t* addrlen)
{
long rc = syscall(SYS_accept, sockfd, addr, addrlen);
__errno_return(rc, int);
}
}

View File

@ -21,6 +21,8 @@ class Buffer
Result<void> append_data(const u8* data, usize size);
usize dequeue_data(u8* data, usize size);
u8* data()
{
return m_data;

View File

@ -1,5 +1,7 @@
#pragma once
#include <luna/Atomic.h>
#include <luna/Heap.h>
#include <luna/Result.h>
#include <luna/Types.h>
template <typename T, usize Size> class CircularQueue
@ -55,3 +57,68 @@ template <typename T, usize Size> class CircularQueue
Atomic<usize> m_head = 0;
Atomic<usize> m_tail = 0;
};
template <typename T> class DynamicCircularQueue
{
public:
DynamicCircularQueue()
{
}
~DynamicCircularQueue()
{
if (m_data) free_impl(m_data);
}
Result<void> set_size(usize size)
{
m_data = (T*)TRY(calloc_impl(size + 1, sizeof(T), false));
m_capacity = size + 1;
return {};
}
bool try_push(const T& value)
{
check(m_capacity);
usize current_tail = m_tail.load(MemoryOrder::Relaxed);
const usize new_tail = (current_tail + 1) % m_capacity;
if (new_tail == m_head.load(MemoryOrder::Acquire))
{
// Queue is full
return false;
}
m_data[current_tail] = value;
if (!m_tail.compare_exchange_strong(current_tail, new_tail, MemoryOrder::Release, MemoryOrder::Relaxed))
{
// Someone else updated the tail
return false;
}
return true;
}
bool try_pop(T& value)
{
check(m_capacity);
usize current_head = m_head.load(MemoryOrder::Relaxed);
if (current_head == m_tail.load(MemoryOrder::Acquire))
{
// Queue is empty
return false;
}
value = m_data[current_head];
const usize new_head = (current_head + 1) % m_capacity;
if (!m_head.compare_exchange_strong(current_head, new_head, MemoryOrder::Release, MemoryOrder::Relaxed))
{
// Someone else updated the head
return false;
}
return true;
}
private:
T* m_data = nullptr;
usize m_capacity = 0;
Atomic<usize> m_head = 0;
Atomic<usize> m_tail = 0;
};

View File

@ -7,7 +7,7 @@
_e(fstatat) _e(chdir) _e(getcwd) _e(unlinkat) _e(uname) _e(sethostname) _e(dup2) _e(pipe) _e(mount) \
_e(umount) _e(pstat) _e(getrusage) _e(symlinkat) _e(readlinkat) _e(umask) _e(linkat) _e(faccessat) \
_e(pivot_root) _e(sigreturn) _e(sigaction) _e(kill) _e(sigprocmask) _e(setpgid) _e(isatty) \
_e(getpgid)
_e(getpgid) _e(socket) _e(bind) _e(connect) _e(listen) _e(accept)
enum Syscalls
{

View File

@ -45,11 +45,18 @@
#define EOVERFLOW 75 // Value too large for defined data type
#define EILSEQ 84 // Invalid or incomplete multibyte or wide character
#define ENOTSOCK 88 // Socket operation on non-socket
#define EDESTADDRREQ 89 // Destination address required
#define EPROTOTYPE 91 // Protocol wrong type for socket
#define ENOTSUP 95 // Operation not supported
#define EOPNOTSUPP 95 // Operation not supported
#define EAFNOSUPPORT 97 // Address family not supported by protocol
#define EADDRINUSE 98 // Address already in use
#define EADDRNOTAVAIL 99 // Cannot assign requested address
#define ENETRESET 102 // Network dropped connection on reset
#define ECONNRESET 104 // Connection reset by peer
#define EISCONN 106 // Transport endpoint is already connected
#define ENOTCONN 107 // Transport endpoint is not connected
#define ETIMEDOUT 110 // Connection timed out
#define ECONNREFUSED 111 // Connection refused
#define EALREADY 114 // Operation already in progress
#define EINPROGRESS 115 // Operation now in progress

View File

@ -53,6 +53,20 @@ Result<void> Buffer::append_data(const u8* data, usize size)
return {};
}
usize Buffer::dequeue_data(u8* data, usize size)
{
if (size > m_size) size = m_size;
if (!size) return 0;
memcpy(data, m_data, size);
memmove(m_data, m_data + size, m_size - size);
m_size -= size;
return size;
}
u8* Buffer::release_data()
{
u8* data = m_data;

View File

@ -56,6 +56,13 @@ const char* error_string(int error)
case EISCONN: return "Transport endpoint is already connected";
case ETIMEDOUT: return "Connection timed out";
case EALREADY: return "Operation already in progress";
case EDESTADDRREQ: return "Destination address required";
case EPROTOTYPE: return "Protocol wrong type for socket";
case EAFNOSUPPORT: return "Address family not supported by protocol";
case ENOTCONN: return "Transport endpoint is not connected";
case EADDRNOTAVAIL: return "Cannot assign requested address";
case ECONNREFUSED: return "Connection refused";
case EINPROGRESS: return "Operation now in progress";
default: return "Unknown error";
}
}
@ -118,6 +125,13 @@ const char* error_name(int error)
ERROR(EISCONN);
ERROR(ETIMEDOUT);
ERROR(EALREADY);
ERROR(EDESTADDRREQ);
ERROR(EPROTOTYPE);
ERROR(EAFNOSUPPORT);
ERROR(ENOTCONN);
ERROR(EADDRNOTAVAIL);
ERROR(ECONNREFUSED);
ERROR(EINPROGRESS);
default: return nullptr;
}

View File

@ -21,6 +21,7 @@ namespace os
if (S_ISBLK(mode)) return 'b';
if (S_ISLNK(mode)) return 'l';
if (S_ISFIFO(mode)) return 'p';
if (S_ISSOCK(mode)) return 's';
return '?';
}