Add paging no-execute and write protection support.

Enable the NX bit on x86_64 and set if not PROT_EXEC and enable the write
protection mode (CR0.WP) that disables the default behavior where the kernel
is able to write to read-only memory. Fix kernel broken assumptions it can
access read-only memory and take care to never set PROT_KWRITE on user-space
pages unless PROT_WRITE is also set, otherwise user-space will be able to
write to read-only memory.

This achieves X^W in the whole system except for the core kernel itself as
it is currently don't know the purpose of pages when identity mapping the
first 4 MiB.
This commit is contained in:
Jonas 'Sortie' Termansen 2015-08-27 21:39:35 +02:00
parent c8bbd6e0aa
commit 2e3d7c45af
8 changed files with 80 additions and 41 deletions

View File

@ -262,13 +262,14 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
pheader->p_vaddr % pheader->p_align !=
pheader->p_offset % pheader->p_align )
return errno = EINVAL, 0;
int prot = PROT_FORK | PROT_KREAD | PROT_KWRITE;
int kprot = PROT_KWRITE | PROT_FORK;
int prot = PROT_FORK;
if ( pheader->p_flags & PF_X )
prot |= PROT_EXEC;
if ( pheader->p_flags & PF_R )
prot |= PROT_READ;
prot |= PROT_READ | PROT_KREAD;
if ( pheader->p_flags & PF_W )
prot |= PROT_WRITE;
prot |= PROT_WRITE | PROT_KWRITE;
if ( pheader->p_vaddr < userspace_addr )
return errno = EINVAL, 0;
@ -284,7 +285,7 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
struct segment segment;
segment.addr = map_start;
segment.size = map_size;
segment.prot = prot;
segment.prot = kprot;
assert(IsUserspaceSegment(&segment));
@ -298,7 +299,7 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
return errno = EINVAL, 0;
}
if ( !Memory::MapRange(segment.addr, segment.size, prot, PAGE_USAGE_USER_SPACE) )
if ( !Memory::MapRange(segment.addr, segment.size, kprot, PAGE_USAGE_USER_SPACE) )
{
kthread_mutex_unlock(&process->segment_lock);
kthread_mutex_unlock(&process->segment_write_lock);
@ -313,11 +314,12 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
return errno = EINVAL, 0;
}
kthread_mutex_unlock(&process->segment_lock);
kthread_mutex_unlock(&process->segment_write_lock);
memset((void*) segment.addr, 0, segment.size);
memcpy((void*) pheader->p_vaddr, file + pheader->p_offset, pheader->p_filesz);
Memory::ProtectMemory(CurrentProcess(), segment.addr, segment.size, prot);
kthread_mutex_unlock(&process->segment_lock);
kthread_mutex_unlock(&process->segment_write_lock);
}
}

View File

@ -318,7 +318,7 @@ bool ExtractFromPhysicalInto(addr_t physaddr, size_t size, Ref<Descriptor> desc)
// Map the physical frames onto our address space.
addr_t mapat = initrd_addr_alloc.from;
for ( size_t i = 0; i < size; i += Page::Size() )
if ( !Memory::Map(physaddr + i, mapat + i, PROT_KREAD) )
if ( !Memory::Map(physaddr + i, mapat + i, PROT_KREAD | PROT_KWRITE) )
PanicF("Unable to map the init ramdisk into virtual memory");
Memory::Flush();

View File

@ -148,7 +148,7 @@ bool ProtectMemory(Process* process, uintptr_t addr, size_t size, int prot)
// First split the segments overlapping with [addr, addr + size) into
// smaller segments that doesn't cross addr and addr+size, while verifying
// there are no gaps in that region. This is where the operation can fail as
// the AddSegtment call can run out of memory. There is no harm in splitting
// the AddSegment call can run out of memory. There is no harm in splitting
// the segments into smaller chunks.
for ( size_t offset = 0; offset < size; )
{
@ -353,7 +353,7 @@ void* sys_mmap(void* addr_ptr, size_t size, int prot, int flags, int fd,
new_segment.size = aligned_size;
else if ( !PlaceSegment(&new_segment, process, (void*) addr, aligned_size, flags) )
return errno = ENOMEM, MAP_FAILED;
new_segment.prot = prot | PROT_KREAD | PROT_KWRITE | PROT_FORK;
new_segment.prot = PROT_KWRITE | PROT_FORK;
// Allocate a memory segment with the desired properties.
if ( !Memory::MapMemory(process, new_segment.addr, new_segment.size, new_segment.prot) )
@ -390,6 +390,16 @@ void* sys_mmap(void* addr_ptr, size_t size, int prot, int flags, int fd,
}
}
// Finally switch to the desired page protections.
kthread_mutex_lock(&process->segment_lock);
if ( prot & PROT_READ )
prot |= PROT_KREAD;
if ( prot & PROT_WRITE )
prot |= PROT_KWRITE;
prot |= PROT_FORK;
Memory::ProtectMemory(CurrentProcess(), new_segment.addr, new_segment.size, prot);
kthread_mutex_unlock(&process->segment_lock);
lock1.Reset();
return (void*) new_segment.addr;

View File

@ -851,7 +851,8 @@ int Process::Execute(const char* programname, const uint8_t* program,
size_t raw_tls_size_aligned = -(-raw_tls_size & ~(aux.tls_mem_align-1));
if ( raw_tls_size && raw_tls_size_aligned == 0 /* overflow */ )
return errno = EINVAL, -1;
int raw_tls_prot = PROT_READ | PROT_KREAD | PROT_KWRITE | PROT_FORK;
int raw_tls_kprot = PROT_KWRITE | PROT_FORK;
int raw_tls_prot = PROT_READ | PROT_KREAD | PROT_FORK;
void* raw_tls_hint = stack_hint;
size_t tls_size = raw_tls_size_aligned + aux.uthread_size;
@ -873,7 +874,8 @@ int Process::Execute(const char* programname, const uint8_t* program,
void* tls_hint = stack_hint;
size_t auxcode_size = Page::Size();
int auxcode_prot = PROT_EXEC | PROT_READ | PROT_KREAD | PROT_KWRITE | PROT_FORK;
int auxcode_kprot = PROT_KWRITE | PROT_FORK;
int auxcode_prot = PROT_EXEC | PROT_READ | PROT_KREAD | PROT_FORK;
void* auxcode_hint = stack_hint;
size_t arg_size = 0;
@ -900,9 +902,9 @@ int Process::Execute(const char* programname, const uint8_t* program,
if ( !(MapSegment(&arg_segment, stack_hint, arg_size, 0, stack_prot) &&
MapSegment(&stack_segment, stack_hint, stack_size, 0, stack_prot) &&
MapSegment(&raw_tls_segment, raw_tls_hint, raw_tls_size, 0, raw_tls_prot) &&
MapSegment(&raw_tls_segment, raw_tls_hint, raw_tls_size, 0, raw_tls_kprot) &&
MapSegment(&tls_segment, tls_hint, tls_size, 0, tls_prot) &&
MapSegment(&auxcode_segment, auxcode_hint, auxcode_size, 0, auxcode_prot)) )
MapSegment(&auxcode_segment, auxcode_hint, auxcode_size, 0, auxcode_kprot)) )
{
kthread_mutex_unlock(&segment_lock);
kthread_mutex_unlock(&segment_write_lock);
@ -910,9 +912,6 @@ int Process::Execute(const char* programname, const uint8_t* program,
return errno = ENOMEM, -1;
}
kthread_mutex_unlock(&segment_lock);
kthread_mutex_unlock(&segment_write_lock);
char** target_argv = (char**) ((char*) arg_segment.addr + 0);
char** target_envp = (char**) ((char*) arg_segment.addr + argv_size);
char* target_strings = (char*) ((char*) arg_segment.addr + argv_size + envp_size);
@ -945,6 +944,7 @@ int Process::Execute(const char* programname, const uint8_t* program,
uint8_t* target_raw_tls = (uint8_t*) raw_tls_segment.addr;
memcpy(target_raw_tls, file_raw_tls, aux.tls_file_size);
memset(target_raw_tls + aux.tls_file_size, 0, aux.tls_mem_size - aux.tls_file_size);
Memory::ProtectMemory(this, raw_tls_segment.addr, raw_tls_segment.size, raw_tls_prot);
uint8_t* target_tls = (uint8_t*) (tls_segment.addr + tls_offset_tls);
assert((((uintptr_t) target_tls) & (aux.tls_mem_align-1)) == 0);
@ -1020,6 +1020,10 @@ int Process::Execute(const char* programname, const uint8_t* program,
(void) auxcode;
#warning "You need to initialize auxcode with a sigreturn routine"
#endif
Memory::ProtectMemory(this, auxcode_segment.addr, auxcode_segment.size, auxcode_prot);
kthread_mutex_unlock(&segment_lock);
kthread_mutex_unlock(&segment_write_lock);
dtable->OnExecute();

View File

@ -114,6 +114,8 @@ __start:
movl $(bootpml1_b + 0x003), bootpml2 + 1 * 8
# Page Table (identity map the first 4 MiB, except NULL).
# TODO: This is insecure as it doesn't restrict write & execute access to
# the code kernel code & variables appropriately.
movl $(bootpml1_a + 8), %edi
movl $0x1003, %esi
movl $1023, %ecx
@ -152,15 +154,15 @@ __start:
orl $0x20, %eax
movl %eax, %cr4
# Enable long mode.
# Enable long mode and the No-Execute bit.
movl $0xC0000080, %ecx
rdmsr
orl $0x100, %eax
orl $0x900, %eax
wrmsr
# Enable paging and enter long mode (still 32-bit)
# Enable paging (with write protection) and enter long mode (still 32-bit)
movl %cr0, %eax
orl $0x80000000, %eax
orl $0x80010000, %eax
movl %eax, %cr0
# Load the Global Descriptor Table pointer register.

View File

@ -414,26 +414,40 @@ namespace Memory {
addr_t ProtectionToPMLFlags(int prot)
{
addr_t result = 0;
if ( prot & PROT_EXEC ) { result |= PML_USERSPACE; }
if ( prot & PROT_READ ) { result |= PML_USERSPACE; }
if ( prot & PROT_WRITE ) { result |= PML_USERSPACE | PML_WRITABLE; }
if ( prot & PROT_KEXEC ) { result |= 0; }
if ( prot & PROT_KREAD ) { result |= 0; }
if ( prot & PROT_KWRITE ) { result |= 0; }
if ( prot & PROT_FORK ) { result |= PML_FORK; }
addr_t result = PML_NX;
if ( prot & PROT_EXEC )
{
result |= PML_USERSPACE;
result &= ~PML_NX;
}
if ( prot & PROT_READ )
result |= PML_USERSPACE;
if ( prot & PROT_WRITE )
result |= PML_USERSPACE | PML_WRITABLE;
if ( prot & PROT_KEXEC )
result &= ~PML_NX;
if ( prot & PROT_KREAD )
result |= 0;
if ( prot & PROT_KWRITE )
result |= PML_WRITABLE;
if ( prot & PROT_FORK )
result |= PML_FORK;
return result;
}
int PMLFlagsToProtection(addr_t flags)
{
int prot = PROT_KREAD | PROT_KWRITE | PROT_KEXEC;
bool user = flags & PML_USERSPACE;
bool write = flags & PML_WRITABLE;
if ( user )
prot |= PROT_EXEC | PROT_READ;
if ( user && write )
int prot = PROT_KREAD;
if ( (flags & PML_USERSPACE) && !(flags & PML_NX) )
prot |= PROT_EXEC;
if ( (flags & PML_USERSPACE) )
prot |= PROT_READ;
if ( (flags & PML_USERSPACE) && (flags & PML_WRITABLE) )
prot |= PROT_WRITE;
if ( !(flags & PML_NX) )
prot |= PROT_KEXEC;
if ( flags & PML_WRITABLE )
prot |= PROT_KWRITE;
if ( flags & PML_FORK )
prot |= PROT_FORK;
return prot;

View File

@ -1,6 +1,6 @@
/*******************************************************************************
Copyright(C) Jonas 'Sortie' Termansen 2011, 2012, 2014.
Copyright(C) Jonas 'Sortie' Termansen 2011, 2012, 2014, 2015.
This file is part of Sortix.
@ -47,8 +47,13 @@ const addr_t PML_AVAILABLE1 = 1 << 9;
const addr_t PML_AVAILABLE2 = 1 << 10;
const addr_t PML_AVAILABLE3 = 1 << 11;
const addr_t PML_FORK = PML_AVAILABLE1;
const addr_t PML_FLAGS = 0xFFFUL; // Bits used for the flags.
const addr_t PML_ADDRESS = ~0xFFFUL; // Bits used for the address.
#ifdef __x86_64__
const addr_t PML_NX = 1UL << 63;
#else
const addr_t PML_NX = 0;
#endif
const addr_t PML_FLAGS = 0xFFFUL | PML_NX; // Bits used for the flags.
const addr_t PML_ADDRESS = ~PML_FLAGS; // Bits used for the address.
const addr_t PAT_UC = 0x00; // Uncacheable
const addr_t PAT_WC = 0x01; // Write-Combine
const addr_t PAT_WT = 0x04; // Writethrough

View File

@ -80,6 +80,8 @@ __start:
movl $(bootpml1 + 0x003), bootpml2 + 0 * 4
# Page Table (identity map the first 4 MiB, except NULL).
# TODO: This is insecure as it doesn't restrict write & execute access to
# the code kernel code & variables appropriately.
movl $(bootpml1 + 4), %edi
movl $0x1003, %esi
movl $1023, %ecx
@ -103,9 +105,9 @@ __start:
movl $(physpml1 + 0x003), bootpml2 + 1021 * 4
movl $(physpml0 + 0x003), physpml1 + 0 * 4
# Enable paging.
# Enable paging (with write protection).
movl %cr0, %edi
orl $0x80000000, %edi
orl $0x80010000, %edi
movl %edi, %cr0
# Load the Global Descriptor Table pointer register.