From 8c824d3caa361f28bc80deba1e7a65ffc69be717 Mon Sep 17 00:00:00 2001 From: Jonas 'Sortie' Termansen Date: Sun, 19 Mar 2023 15:02:18 +0100 Subject: [PATCH] Add strptime(3). --- libc/Makefile | 1 + libc/time/strptime.c | 280 +++++++++++++++++++++++++++++++++++ share/man/man7/portability.7 | 3 - 3 files changed, 281 insertions(+), 3 deletions(-) create mode 100644 libc/time/strptime.c diff --git a/libc/Makefile b/libc/Makefile index 48f0f2ba..76b75862 100644 --- a/libc/Makefile +++ b/libc/Makefile @@ -258,6 +258,7 @@ time/mktime.o \ timespec/timespec.o \ time/strftime_l.o \ time/strftime.o \ +time/strptime.o \ time/timegm.o \ ubsan/ubsan.o \ wchar/btowc.o \ diff --git a/libc/time/strptime.c b/libc/time/strptime.c new file mode 100644 index 00000000..c42d3e2a --- /dev/null +++ b/libc/time/strptime.c @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2023 Jonas 'Sortie' Termansen. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * time/strptime.c + * Parse date and time. + */ + +#include +#include +#include +#include +#include + +#ifdef TEST +#include +#define strptime mystrptime +#endif + +static const char* wdays[] = {"sunday", "monday", "tuesday", "wednesday", + "thursday", "friday", "saturday", NULL}; +static const char* months[] = {"january", "february", "march", "april", "may", + "june", "july", "august", "september", "october", + "november", "december", NULL}; + +// TODO: Maximum width. +static const char* strptime_match(const char* str, + int* output, + const char* const* list) +{ + for ( int i = 0; list[i]; i++ ) + { + size_t len = strlen(list[i]); + if ( !strncasecmp(str, list[i], len) ) + return *output = i, str + len; + if ( !strncasecmp(str, list[i], 3) ) + return *output = i, str + 3; + } + return NULL; +} + +// TODO: Maximum width. +static const char* strptime_digits(const char* str, + int* output, + int minimum, + int maximum, + int offset) +{ + if ( *str < '0'|| '9' < *str ) + return NULL; + // TODO: More exact with number of digits. + int value = 0; + while ( '0' <= *str && *str <= '9' ) + if ( __builtin_mul_overflow(value, 10, &value) || + __builtin_add_overflow(value, *str++ - '0', &value) ) + return NULL; + if ( value < minimum || maximum < value ) + return NULL; + if ( __builtin_add_overflow(value, offset, &value) ) + return NULL; + return *output = value, str; +} + +char* strptime(const char* restrict str, + const char* restrict format, + struct tm* restrict tm) +{ + bool pm = false; + bool need_mktime = false; + for ( size_t i = 0; format[i]; ) + { + if ( isspace((unsigned char) format[i]) ) + { + do i++; + while ( isspace((unsigned char) format[i]) ); + if ( !isspace((unsigned char) *str) ) + return NULL; + do str++; + while ( isspace((unsigned char) *str) ); + continue; + } + else if ( format[i] != '%' ) + { + + if ( format[i++] != *str++ ) + return NULL; + continue; + } + i++; + if ( format[i] == '0' || format[i] == '+' ) + i++; + size_t width = 0; + while ( '0' <= format[i] && format[i] <= '9' ) + width = width * 10 + (format[i++] - '0'); + // TODO: Maximum width. + bool modifier_E = false, modifier_O = false; + if ( format[i] == 'E' ) + modifier_E = true, i++; + else if ( format[i] == 'O' ) + modifier_O = true, i++; + (void) modifier_E, (void) modifier_O; + switch ( format[i] ) + { + case 'a': + case 'A': str = strptime_match(str, &tm->tm_wday, wdays); break; + case 'b': + case 'B': + case 'h': + str = strptime_match(str, &tm->tm_mon, months); + need_mktime = true; + break; + // TODO: %c locale time and date + // TODO: %C century + case 'd': + case 'e': + str = strptime_digits(str, &tm->tm_mday, 1, 31, 0); + need_mktime = true; + break; + case 'D': str = strptime(str, "%m/%d/%y", tm); break; + case 'H': str = strptime_digits(str, &tm->tm_hour, 0, 23, 0); break; + case 'I': + str = strptime_digits(str, &tm->tm_hour, 1, 12, 0); + if ( tm->tm_hour == 12 ) + tm->tm_hour = 0; + break; + case 'j': str = strptime_digits(str, &tm->tm_yday, 1, 366, -1); break; + case 'm': + str = strptime_digits(str, &tm->tm_mon, 1, 12, -1); + need_mktime = true; + break; + case 'M': str = strptime_digits(str, &tm->tm_min, 0, 59, 0); break; + // TODO: %n whitesapce + // TODO: %t whitespace + case 'p': + if ( !strncasecmp(str, "am", 2) ) + str += 2, pm = false; + else if ( !strncasecmp(str, "pm", 2) ) + str += 2, pm = true; + else + return NULL; + break; + case 'r': str = strptime(str, "%I:%M:%S %p", tm); break; + case 'R': str = strptime(str, "%H:%M", tm); break; + case 'S': str = strptime_digits(str, &tm->tm_sec, 0, 60, 0); break; + case 'T': str = strptime(str, "%H:%M:%S", tm); break; + // TODO: %U week number + case 'w': str = strptime_digits(str, &tm->tm_wday, 0, 6, 0); break; + // TODO: %W week number + case 'x': str = strptime(str, "%m/%d/%Y", tm); break; + case 'X': str = strptime(str, "%H:%M:%S", tm); break; + // TODO: %y non-century year + case 'Y': + // TODO: Minimum yield width. + str = strptime_digits(str, &tm->tm_year, INT_MIN, INT_MAX, -1900); + need_mktime = true; + break; + case 'z': + // TODO: More exact. + if ( *str != '-' || *str != '+' ) + return NULL; + int hours, minutes; + if ( !(str = strptime_digits(str, &hours, -12, 12, 0)) || + !(str = strptime_digits(str, &minutes, 0, 59, 0)) ) + return NULL; + tm->tm_isdst = 0; + // TODO: What is done with this timezone information? + break; + case 'Z': + // TODO: Other timezones. + if ( strncmp(str, "UTC", 3) != 0 ) + return NULL; + str += 3; + tm->tm_isdst = 0; + // TODO: What is done with this timezone information? + break; + case '%': + if ( *str++ != '%' ) + return NULL; + break; + default: NULL; + } + if ( !str ) + return NULL; + i++; + } + if ( str ) + { + if ( pm ) + tm->tm_hour += 12; + if ( need_mktime ) + mktime(tm); + } + return (char*) str; +} + +#ifdef TEST +#undef strptime + +#include +#include + +int main(int argc, char* argv[]) +{ + if ( argc < 3 ) + err(1, "usage"); + const char* str = argv[1]; + const char* format = argv[2]; + struct tm my_tm = {0}; + char* my_end = mystrptime(str, format, &my_tm); + struct tm c_tm = {0}; + char* c_end = strptime(str, format, &c_tm); + if ( !my_end && c_end ) + errx(1, "rejected but c allowed it"); + else if ( !my_end ) + errx(1, "rejected correctly"); + else if ( !c_end ) + printf("allowed but c rejected\n"); + else if ( my_end != c_end ) + errx(1, "mismatch my end \"%s\" vs c end \"%s\"", my_end, c_end); + + if ( my_tm.tm_sec == c_tm.tm_sec ) + printf("tm_sec=%i\n", my_tm.tm_sec); + else + printf("tm_sec=%i but C is %i\n", my_tm.tm_sec, c_tm.tm_sec); + + if ( my_tm.tm_min == c_tm.tm_min ) + printf("tm_min=%i\n", my_tm.tm_min); + else + printf("tm_min=%i but C is %i\n", my_tm.tm_min, c_tm.tm_min); + + if ( my_tm.tm_hour == c_tm.tm_hour ) + printf("tm_hour=%i\n", my_tm.tm_hour); + else + printf("tm_hour=%i but C is %i\n", my_tm.tm_hour, c_tm.tm_hour); + + if ( my_tm.tm_mday == c_tm.tm_mday ) + printf("tm_mday=%i\n", my_tm.tm_mday); + else + printf("tm_mday=%i but C is %i\n", my_tm.tm_mday, c_tm.tm_mday); + + if ( my_tm.tm_mon == c_tm.tm_mon ) + printf("tm_mon=%i\n", my_tm.tm_mon); + else + printf("tm_mon=%i but C is %i\n", my_tm.tm_mon, c_tm.tm_mon); + + if ( my_tm.tm_year == c_tm.tm_year ) + printf("tm_year=%i\n", my_tm.tm_year); + else + printf("tm_year=%i but C is %i\n", my_tm.tm_year, c_tm.tm_year); + + if ( my_tm.tm_wday == c_tm.tm_wday ) + printf("tm_wday=%i\n", my_tm.tm_wday); + else + printf("tm_wday=%i but C is %i\n", my_tm.tm_wday, c_tm.tm_wday); + + if ( my_tm.tm_yday == c_tm.tm_yday ) + printf("tm_yday=%i\n", my_tm.tm_yday); + else + printf("tm_yday=%i but C is %i\n", my_tm.tm_yday, c_tm.tm_yday); + + if ( my_tm.tm_isdst == c_tm.tm_isdst ) + printf("tm_isdst=%i\n", my_tm.tm_isdst); + else + printf("tm_isdst=%i but C is %i\n", my_tm.tm_isdst, c_tm.tm_isdst); + + return 0; +} + +#endif diff --git a/share/man/man7/portability.7 b/share/man/man7/portability.7 index 323d09e0..aa6d5cee 100644 --- a/share/man/man7/portability.7 +++ b/share/man/man7/portability.7 @@ -243,9 +243,6 @@ should be use instead as the destination buffer size should always be known, otherwise the invocation is suspicious. The superior alternative is to combine allocation and initialization using .Xr asprintf 3 . -.Ss strptime -.Xr strptime 3 -is not currently implemented. .Ss .In sys/param.h is not implemented as there is little agreement on what it's supposed to contain