sortix-mirror/kernel/fcache.cpp

523 lines
14 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2013, 2014, 2017 Jonas 'Sortie' Termansen.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* fcache.cpp
* Cache mechanism for file contents.
*/
2013-03-12 13:24:31 +00:00
#include <sys/types.h>
#include <sys/uio.h>
2013-03-12 13:24:31 +00:00
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <sortix/mman.h>
#include <sortix/seek.h>
#include <sortix/kernel/addralloc.h>
#include <sortix/kernel/fcache.h>
#include <sortix/kernel/ioctx.h>
2013-10-27 00:42:10 +00:00
#include <sortix/kernel/kernel.h>
2013-03-12 13:24:31 +00:00
#include <sortix/kernel/kthread.h>
#include <sortix/kernel/memorymanagement.h>
namespace Sortix {
__attribute__((unused))
static uintptr_t MakeBlockId(size_t area, size_t blocks_per_area, size_t block)
{
return (uintptr_t) area * (uintptr_t) blocks_per_area + (uintptr_t) block;
}
__attribute__((unused))
static uintptr_t MakeBlockInformation(uintptr_t blockid, uintptr_t flags)
{
return (blockid << 12) | (flags & ((1 << (12+1)) - 1));
}
__attribute__((unused))
static uintptr_t FlagsOfBlockInformation(uintptr_t info)
{
return info & ((1 << (12+1)) - 1);
}
__attribute__((unused))
static uintptr_t BlockIdOfBlockInformation(uintptr_t info)
{
return info >> 12;
}
__attribute__((unused))
static size_t BlockOfBlockId(uintptr_t blockid, size_t blocks_per_area)
{
return (size_t) (blockid % blocks_per_area);
}
__attribute__((unused))
static size_t AreaOfBlockId(uintptr_t blockid, size_t blocks_per_area)
{
return (size_t) (blockid / blocks_per_area);
}
BlockCache::BlockCache()
{
areas = NULL;
areas_used = 0;
areas_length = 0;
blocks_per_area = 1024UL;
2013-03-12 16:41:25 +00:00
unused_block_count = 0;
blocks_used = 0;
blocks_allocated = 0;
2013-03-12 13:24:31 +00:00
mru_block = NULL;
lru_block = NULL;
unused_block = NULL;
bcache_mutex = KTHREAD_MUTEX_INITIALIZER;
}
BlockCache::~BlockCache()
{
// TODO: Clean up everything here!
}
BlockCacheBlock* BlockCache::AcquireBlock()
{
ScopedLock lock(&bcache_mutex);
if ( !unused_block && !AddArea() )
return NULL;
BlockCacheBlock* ret = unused_block;
assert(ret);
2013-03-12 16:41:25 +00:00
unused_block_count--;
2013-03-12 13:24:31 +00:00
unused_block = unused_block->next_block;
ret->prev_block = ret->next_block = NULL;
if ( unused_block )
unused_block->prev_block = NULL;
ret->information |= BCACHE_USED;
LinkBlock(ret);
blocks_used++;
2013-03-12 13:24:31 +00:00
return ret;
}
void BlockCache::ReleaseBlock(BlockCacheBlock* block)
{
ScopedLock lock(&bcache_mutex);
2013-03-12 16:41:25 +00:00
assert(block->information & BCACHE_PRESENT);
assert(block->information & BCACHE_USED);
blocks_used--;
2013-03-12 16:41:25 +00:00
if ( blocks_per_area < unused_block_count )
{
blocks_allocated--;
2013-03-12 16:41:25 +00:00
uint8_t* block_data = BlockDataUnlocked(block);
addr_t block_data_addr = Memory::Unmap((addr_t) block_data);
Page::Put(block_data_addr, PAGE_USAGE_FILESYSTEM_CACHE);
2013-03-12 16:41:25 +00:00
// TODO: We leak this block's meta information here. Rather, we should
// put this block into a list of non-present blocks so we can reuse it
// later and reallocate a physical frame for it - then we will just
// reuse the block's meta information.
block->information &= ~(BCACHE_USED | BCACHE_PRESENT);
return;
}
2013-03-12 13:24:31 +00:00
UnlinkBlock(block);
2013-03-12 16:41:25 +00:00
unused_block_count++;
2013-03-12 13:24:31 +00:00
if ( unused_block )
unused_block->prev_block = block;
block->next_block = unused_block;
block->prev_block = NULL;
block->information &= ~BCACHE_USED;
unused_block = block;
}
uint8_t* BlockCache::BlockData(BlockCacheBlock* block)
{
ScopedLock lock(&bcache_mutex);
2013-03-12 16:41:25 +00:00
return BlockDataUnlocked(block);
}
uint8_t* BlockCache::BlockDataUnlocked(BlockCacheBlock* block)
{
2013-03-12 13:24:31 +00:00
uintptr_t block_id = BlockIdOfBlockInformation(block->information);
uintptr_t area_num = AreaOfBlockId(block_id, blocks_per_area);
uintptr_t area_off = BlockOfBlockId(block_id, blocks_per_area);
return areas[area_num].data + area_off * Page::Size();
}
void BlockCache::MarkUsed(BlockCacheBlock* block)
{
ScopedLock lock(&bcache_mutex);
UnlinkBlock(block);
LinkBlock(block);
}
void BlockCache::MarkModified(BlockCacheBlock* block)
{
ScopedLock lock(&bcache_mutex);
UnlinkBlock(block);
LinkBlock(block);
block->information |= BCACHE_MODIFIED;
}
void BlockCache::UnlinkBlock(BlockCacheBlock* block)
{
(block->prev_block ? block->prev_block->next_block : mru_block) = block->next_block;
(block->next_block ? block->next_block->prev_block : lru_block) = block->prev_block;
block->next_block = block->prev_block = NULL;
}
void BlockCache::LinkBlock(BlockCacheBlock* block)
{
assert(!(block->next_block || block == lru_block));
assert(!(block->prev_block || block == mru_block));
block->prev_block = NULL;
block->next_block = mru_block;
if ( mru_block )
mru_block->prev_block = block;
mru_block = block;
if ( !lru_block )
lru_block = block;
}
bool BlockCache::AddArea()
{
if ( areas_used == areas_length )
{
size_t new_length = areas_length ? 2 * areas_length : 8UL;
size_t new_size = new_length * sizeof(BlockCacheArea);
BlockCacheArea* new_areas = (BlockCacheArea*) realloc(areas, new_size);
if ( !new_areas )
return false;
areas = new_areas;
areas_length = new_length;
}
size_t area_memory_size = Page::Size() * blocks_per_area;
int prot = PROT_KREAD | PROT_KWRITE;
BlockCacheArea* area = &areas[areas_used];
if ( !AllocateKernelAddress(&area->addralloc, area_memory_size) )
goto cleanup_done;
if ( !(area->blocks = new BlockCacheBlock[blocks_per_area]) )
goto cleanup_addralloc;
if ( !Memory::MapRange(area->addralloc.from, area->addralloc.size, prot, PAGE_USAGE_FILESYSTEM_CACHE) )
2013-03-12 13:24:31 +00:00
goto cleanup_blocks;
Memory::Flush();
blocks_allocated += blocks_per_area;
2013-03-12 13:24:31 +00:00
// Add all our new blocks into the unused block linked list.
for ( size_t i = blocks_per_area; i != 0; i-- )
{
size_t index = i - 1;
BlockCacheBlock* block = &area->blocks[index];
uintptr_t blockid = MakeBlockId(areas_used, blocks_per_area, index);
block->information = MakeBlockInformation(blockid, BCACHE_PRESENT);
block->fcache = NULL;
block->next_block = unused_block;
block->prev_block = NULL;
if ( unused_block )
unused_block->prev_block = block;
unused_block = block;
2013-03-12 16:41:25 +00:00
unused_block_count++;
2013-03-12 13:24:31 +00:00
}
area->data = (uint8_t*) area->addralloc.from;
for ( size_t i = 0; i < area_memory_size; i++ )
area->data[i] = 0xCC;
return areas_used++, true;
cleanup_blocks:
delete[] area->blocks;
cleanup_addralloc:
FreeKernelAddress(&area->addralloc);
cleanup_done:
return false;
}
static BlockCache* kernel_block_cache = NULL;
/*static*/ void FileCache::Init()
{
if ( !(kernel_block_cache = new BlockCache()) )
Panic("Unable to allocate kernel block cache");
}
FileCache::FileCache(/*FileCacheBackend* backend*/)
{
assert(kernel_block_cache);
file_size = 0;
file_written = 0;
blocks = NULL;
blocks_used = 0;
blocks_length = 0;
fcache_mutex = KTHREAD_MUTEX_INITIALIZER;
modified = false;
modified_size = false;
}
FileCache::~FileCache()
{
for ( size_t i = 0; i < blocks_used; i++ )
kernel_block_cache->ReleaseBlock(blocks[i]);
free(blocks);
}
int FileCache::sync(ioctx_t* /*ctx*/)
{
ScopedLock lock(&fcache_mutex);
return Synchronize() ? 0 : -1;
}
bool FileCache::Synchronize()
{
//if ( !backend )
if ( true )
return true;
if ( !(modified || modified_size ) )
return true;
// TODO: Write out all dirty blocks and ask the next level to sync as well.
return true;
}
void FileCache::InitializeFileData(off_t to_where)
{
while ( file_written < to_where )
{
off_t left = to_where - file_written;
size_t block_off = (size_t) (file_written % Page::Size());
size_t block_num = (size_t) (file_written / Page::Size());
size_t block_left = Page::Size() - block_off;
size_t amount_to_set = (uintmax_t) left < (uintmax_t) block_left ?
(size_t) left : block_left;
assert(block_num < blocks_used);
BlockCacheBlock* block = blocks[block_num];
uint8_t* block_data = kernel_block_cache->BlockData(block);
uint8_t* data = block_data + block_off;
memset(data, 0, amount_to_set);
file_written += (off_t) amount_to_set;
kernel_block_cache->MarkModified(block);
}
}
ssize_t FileCache::preadv(ioctx_t* ctx, const struct iovec* iovs, int iovcnt,
off_t off)
2013-03-12 13:24:31 +00:00
{
ScopedLock lock(&fcache_mutex);
ssize_t so_far = 0;
int iov_i = 0;
size_t iov_offset = 0;
while ( iov_i < iovcnt && so_far < SSIZE_MAX )
2013-03-12 13:24:31 +00:00
{
off_t current_off = off + (off_t) so_far;
if ( file_size <= current_off )
break;
size_t maxcount = SSIZE_MAX - so_far;
if ( (uintmax_t) (file_size - current_off) < maxcount )
maxcount = file_size - current_off;
if ( maxcount == 0 )
break;
const struct iovec* iov = &iovs[iov_i];
uint8_t* buf = (uint8_t*) iov->iov_base + iov_offset;
size_t count = iov->iov_len - iov_offset;
if ( maxcount < count )
count = maxcount;
if ( count == 0 )
{
iov_i++;
iov_offset = 0;
continue;
}
2013-03-12 13:24:31 +00:00
size_t block_off = (size_t) (current_off % Page::Size());
size_t block_num = (size_t) (current_off / Page::Size());
size_t block_left = Page::Size() - block_off;
size_t amount = count < block_left ? count : block_left;
2013-03-12 13:24:31 +00:00
assert(block_num < blocks_used);
BlockCacheBlock* block = blocks[block_num];
const uint8_t* block_data = kernel_block_cache->BlockData(block);
const uint8_t* src_data = block_data + block_off;
if ( file_written < current_off + (off_t) amount )
InitializeFileData(current_off + (off_t) amount);
if ( !ctx->copy_to_dest(buf, src_data, amount) )
return so_far ? (ssize_t) so_far : -1;
so_far += amount;
2013-03-12 13:24:31 +00:00
kernel_block_cache->MarkUsed(block);
iov_offset += amount;
if ( iov_offset == iov->iov_len )
{
iov_i++;
iov_offset = 0;
}
2013-03-12 13:24:31 +00:00
}
return so_far;
2013-03-12 13:24:31 +00:00
}
ssize_t FileCache::pwritev(ioctx_t* ctx, const struct iovec* iovs, int iovcnt,
off_t off)
2013-03-12 13:24:31 +00:00
{
ScopedLock lock(&fcache_mutex);
ssize_t so_far = 0;
int iov_i = 0;
size_t iov_offset = 0;
while ( iov_i < iovcnt && so_far < SSIZE_MAX )
2013-03-12 13:24:31 +00:00
{
off_t current_off = off + (off_t) so_far;
size_t maxcount = SSIZE_MAX - so_far;
if ( (uintmax_t) (OFF_MAX - current_off) < maxcount )
maxcount = OFF_MAX - current_off;
const struct iovec* iov = &iovs[iov_i];
uint8_t* buf = (uint8_t*) iov->iov_base + iov_offset;
size_t count = iov->iov_len - iov_offset;
if ( maxcount < count )
count = maxcount;
if ( count == 0 )
{
if ( so_far == 0 && maxcount == 0 && iov->iov_len != 0 )
return errno = ENOSPC, -1;
iov_i++;
iov_offset = 0;
continue;
}
off_t write_end = current_off + count;
if ( file_size < write_end && !ChangeSize(write_end, false) )
{
if ( file_size <= current_off )
return -1;
if ( (uintmax_t) (file_size - current_off) < count )
count = file_size - current_off;
}
2013-03-12 13:24:31 +00:00
size_t block_off = (size_t) (current_off % Page::Size());
size_t block_num = (size_t) (current_off / Page::Size());
size_t block_left = Page::Size() - block_off;
size_t amount = count < block_left ? count : block_left;
2013-03-12 13:24:31 +00:00
assert(block_num < blocks_used);
BlockCacheBlock* block = blocks[block_num];
uint8_t* block_data = kernel_block_cache->BlockData(block);
uint8_t* data = block_data + block_off;
if ( file_written < current_off )
InitializeFileData(current_off);
assert(amount);
2013-03-12 13:24:31 +00:00
modified = true; /* Unconditionally - copy_from_src can fail midway. */
if ( !ctx->copy_from_src(data, buf, amount) )
return so_far ? (ssize_t) so_far : -1;
if ( file_written < current_off + (off_t) amount )
file_written = current_off + (off_t) amount;
so_far += amount;
2013-03-12 13:24:31 +00:00
kernel_block_cache->MarkModified(block);
iov_offset += amount;
if ( iov_offset == iov->iov_len )
{
iov_i++;
iov_offset = 0;
}
2013-03-12 13:24:31 +00:00
}
return so_far;
2013-03-12 13:24:31 +00:00
}
int FileCache::truncate(ioctx_t* /*ctx*/, off_t length)
{
ScopedLock lock(&fcache_mutex);
return ChangeSize(length, true) ? 0 : -1;
}
off_t FileCache::GetFileSize()
{
ScopedLock lock(&fcache_mutex);
return file_size;
}
off_t FileCache::lseek(ioctx_t* /*ctx*/, off_t offset, int whence)
{
ScopedLock lock(&fcache_mutex);
Fix SEEK_END, file offset overflow, and read/write/mkpartition syscall bugs. Fix SEEK_END seeking twice as far as requested. Centralize lseek handling in one place and avoid overflow bugs. Inode lseek handlers now only need to handle SEEK_END with offset 0. Prevent the file offset from ever going below zero or overflowing. Character devices are now not seekable, but lseek will pretend they are, yet always stay at the file offset 0. pread/pwrite on character devices will now ignore the file offset and call read/write. This change prevents character devices from being memory mapped, notably /dev/zero can no longer be memory mapped. None of the current ports seem to rely on this behavior and will work with just MAP_ANONYMOUS. Refactor read and write system calls to have a shared return statement for both seekable and non-seekable IO. Fix file offset overflow bugs in read and write system calls. Fix system calls returning EPERM instead of properly returning EBADF when the file has not been opened in the right mode. Truncate IO counts and total vector IO length so the IO operation does not do any IO beyond OFF_MAX. Truncate also total vector IO length for recvmsg and sendmsg. Fail with EINVAL if total vector IO length exceeds SSIZE_MAX. Don't stop early if the total IO length is zero, so zero length IO now block on any locks internal to the inode. Handle reads at the maximum file offset with an end of file condition and handle writes of at least one byte at the maximum file offset by failing with EFBIG. Refactor UtilMemoryBuffer to store the file size using off_t instead of size_t to avoid casts and keep file sizes in the off_t type. Properly handle errors in the code, such as failing with EROFS instead of EBADF if the backing memory is not writeable, and failing with EFBIG if writing beyond the end of the file. Fix mkpartition not rejecting invalid partition start offsets and lengths. Strictly enforce partition start and length checks in the partition code. Enforce partitions exist within regular files or block devices. Fix a few indention issues.
2017-10-26 15:12:07 +00:00
if ( whence == SEEK_END && offset == 0 )
return (off_t) file_size;
2013-03-12 13:24:31 +00:00
return errno = EINVAL, -1;
}
//bool FileCache::ChangeBackend(FileCacheBackend* backend, bool sync_old)
//{
//}
bool FileCache::ChangeSize(off_t new_size, bool exact)
{
if ( file_size == new_size && !exact )
return true;
off_t numblocks_off_t = (new_size + Page::Size() - 1) / Page::Size();
// TODO: An alternative datastructure (perhaps tree like) will decrease the
// memory consumption of this pointer list as well as avoid this filesize
// restriction on 32-bit platforms.
if ( (uintmax_t) SIZE_MAX < (uintmax_t) numblocks_off_t )
return errno = EFBIG, false;
size_t numblocks = (size_t) numblocks_off_t;
if ( !ChangeNumBlocks(numblocks, exact) )
return false;
if ( new_size < file_written )
file_written = new_size;
file_size = new_size;
modified_size = true;
return true;
}
bool FileCache::ChangeNumBlocks(size_t new_numblocks, bool exact)
{
if ( new_numblocks == blocks_used && !exact )
return true;
// Release blocks if the file has decreased in size.
if ( new_numblocks < blocks_used )
{
for ( size_t i = new_numblocks; i < blocks_used; i++ )
kernel_block_cache->ReleaseBlock(blocks[i]);
blocks_used = new_numblocks;
}
2013-03-12 13:24:31 +00:00
// If needed, adapt the length of the array containing block pointers.
if ( new_numblocks < blocks_length )
exact = true;
size_t new_blocks_length = 2 * blocks_length;
if ( exact || new_blocks_length < new_numblocks )
new_blocks_length = new_numblocks;
size_t size = sizeof(BlockCacheBlock*) * new_blocks_length;
BlockCacheBlock** new_blocks = (BlockCacheBlock**) realloc(blocks, size);
if ( !new_blocks )
{
if ( blocks_length < new_blocks_length )
2013-03-12 13:24:31 +00:00
return false;
}
else
{
blocks = new_blocks;
2013-03-12 13:24:31 +00:00
blocks_length = new_blocks_length;
}
2013-03-12 13:24:31 +00:00
assert(new_numblocks <= blocks_length);
2013-03-12 13:24:31 +00:00
assert(!blocks_length || blocks);
// Acquire more blocks if the file has increased in size.
for ( size_t i = blocks_used; i < new_numblocks; i++ )
{
if ( !(blocks[i] = kernel_block_cache->AcquireBlock()) )
{
for ( size_t n = blocks_used; n < i; n++ )
kernel_block_cache->ReleaseBlock(blocks[n]);
return false;
}
blocks_used++;
2013-03-12 13:24:31 +00:00
}
return true;
}
} // namespace Sortix