Rewrite scanf(3) to be standards compliant.
This change adds missing features: * %[ for pattern matching with a scanset. * %m for allocation of strings. * %p for pointers. * Field width for integers. The following features remain unimplemented, like with printf(3): * Floating point support. * Wide character support. * %n$ positional parameters. The code has been completely refactored to be much more maintainable. The implemented features should now be standards compliant. A large number of edge cases have been fixed. The vscanf_callback(3) function has been renamed to vcbscanf(3) and a new cbscanf(3) function has been added.
This commit is contained in:
parent
976d686779
commit
221fa7d954
|
@ -75,6 +75,7 @@ signal/sigorset.o \
|
|||
ssp/__stack_chk_fail.o \
|
||||
stdio/asprintf.o \
|
||||
stdio/cbprintf.o \
|
||||
stdio/cbscanf.o \
|
||||
stdio/clearerr.o \
|
||||
stdio/clearerr_unlocked.o \
|
||||
stdio/dprintf.o \
|
||||
|
@ -148,7 +149,7 @@ stdio/vdprintf.o \
|
|||
stdio/vfprintf_unlocked.o \
|
||||
stdio/vfscanf.o \
|
||||
stdio/vfscanf_unlocked.o \
|
||||
stdio/vscanf_callback.o \
|
||||
stdio/vcbscanf.o \
|
||||
stdio/vsnprintf.o \
|
||||
stdio/vsprintf.o \
|
||||
stdio/vsscanf.o \
|
||||
|
|
|
@ -298,13 +298,22 @@ int fshutdown(FILE* fp);
|
|||
#if __USE_SORTIX
|
||||
int cbprintf(void*, size_t (*)(void*, const char*, size_t), const char*, ...)
|
||||
__attribute__((__format__ (printf, 3, 4)));
|
||||
int vcbprintf(void*, size_t (*)(void*, const char*, size_t), const char*, __gnuc_va_list ap)
|
||||
int cbscanf(void*,
|
||||
int (*)(void*),
|
||||
int (*)(int, void*),
|
||||
const char* __restrict,
|
||||
...)
|
||||
__attribute__((__format__ (scanf, 4, 5)));
|
||||
int vcbprintf(void*,
|
||||
size_t (*)(void*, const char*, size_t),
|
||||
const char*,
|
||||
__gnuc_va_list)
|
||||
__attribute__((__format__ (printf, 3, 0)));
|
||||
int vscanf_callback(void* fp,
|
||||
int (*fgetc)(void*),
|
||||
int (*ungetc)(int, void*),
|
||||
const char* __restrict format,
|
||||
__gnuc_va_list ap)
|
||||
int vcbscanf(void*,
|
||||
int (*)(void*),
|
||||
int (*)(int, void*),
|
||||
const char* __restrict,
|
||||
__gnuc_va_list)
|
||||
__attribute__((__format__ (scanf, 4, 0)));
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Jonas 'Sortie' Termansen.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* stdio/cbprintf.c
|
||||
* Formats text and outputs it via callback functions.
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int cbscanf(void* fp,
|
||||
int (*fgetc)(void*),
|
||||
int (*ungetc)(int, void*),
|
||||
const char* restrict format,
|
||||
...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
int result = vcbscanf(fp, fgetc, ungetc, format, ap);
|
||||
va_end(ap);
|
||||
return result;
|
||||
}
|
|
@ -0,0 +1,469 @@
|
|||
/*
|
||||
* Copyright (c) 2012, 2014, 2016, 2020 Jonas 'Sortie' Termansen.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* stdio/vcbscanf.c
|
||||
* Input format conversion.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
enum scan_type
|
||||
{
|
||||
TYPE_SHORT,
|
||||
TYPE_SHORTSHORT,
|
||||
TYPE_INT,
|
||||
TYPE_LONG,
|
||||
TYPE_LONGLONG,
|
||||
TYPE_SIZE,
|
||||
TYPE_PTRDIFF,
|
||||
TYPE_MAX,
|
||||
TYPE_PTR,
|
||||
};
|
||||
|
||||
static int debase(unsigned char c, int base)
|
||||
{
|
||||
if ( c == '0' )
|
||||
return 0;
|
||||
int ret = -1;
|
||||
if ( '0' <= c && c <= '9' ) { ret = c - '0' + 0; }
|
||||
if ( 'a' <= c && c <= 'f' ) { ret = c - 'a' + 10; }
|
||||
if ( 'A' <= c && c <= 'F' ) { ret = c - 'A' + 10; }
|
||||
if ( base <= ret )
|
||||
return -1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static size_t parse_scanset(bool scanset[256], const char* spec)
|
||||
{
|
||||
bool negate = spec[0] == '^';
|
||||
size_t offset = negate ? 1 : 0;
|
||||
for ( size_t i = 0; i < 256; i++ )
|
||||
scanset[i] = negate;
|
||||
if ( spec[offset] == ']' )
|
||||
{
|
||||
offset++;
|
||||
scanset[(unsigned char) ']'] = !negate;
|
||||
}
|
||||
for ( ; spec[offset] && spec[offset] != ']'; offset++ )
|
||||
{
|
||||
unsigned char c = (unsigned char) spec[offset];
|
||||
// Only allow ASCII in the scanset besides negation.
|
||||
if ( 128 < c )
|
||||
return offset;
|
||||
if ( spec[offset + 1] == '-' &&
|
||||
spec[offset + 2] &&
|
||||
spec[offset + 2] != ']' )
|
||||
{
|
||||
unsigned char to = (unsigned char) spec[offset + 2];
|
||||
for ( int i = c; i <= to; i++ )
|
||||
scanset[i] = !negate;
|
||||
offset += 2;
|
||||
}
|
||||
else
|
||||
scanset[(unsigned char) spec[offset]] = !negate;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
int vcbscanf(void* fp,
|
||||
int (*fgetc)(void*),
|
||||
int (*ungetc)(int, void*),
|
||||
const char* restrict format,
|
||||
va_list ap)
|
||||
{
|
||||
int matched_items = 0;
|
||||
uintmax_t bytes_parsed = 0;
|
||||
int ic = 0;
|
||||
while ( *format )
|
||||
{
|
||||
if ( isspace((unsigned char) *format) )
|
||||
{
|
||||
do format++;
|
||||
while ( isspace((unsigned char) *format) );
|
||||
while ( true )
|
||||
{
|
||||
ic = fgetc(fp);
|
||||
if ( ic == EOF )
|
||||
break;
|
||||
bytes_parsed++;
|
||||
if ( !isspace(ic) )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
bytes_parsed--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if ( format[0] != '%' || format[1] == '%' )
|
||||
{
|
||||
if ( *format == '%' )
|
||||
format++;
|
||||
unsigned char c = *format++;
|
||||
ic = fgetc(fp);
|
||||
if ( ic == EOF )
|
||||
return matched_items ? matched_items : EOF;
|
||||
bytes_parsed++;
|
||||
if ( ic != c )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
bytes_parsed--;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
format++;
|
||||
bool discard = format[0] == '*';
|
||||
if ( discard )
|
||||
format++;
|
||||
size_t field_width = 0;
|
||||
while ( '0'<= *format && *format <= '9' )
|
||||
field_width = field_width * 10 + *format++ - '0';
|
||||
bool allocate = false;
|
||||
if ( *format == 'm' )
|
||||
{
|
||||
allocate = true;
|
||||
format++;
|
||||
}
|
||||
enum scan_type scan_type = TYPE_INT;
|
||||
switch ( *format++ )
|
||||
{
|
||||
case 'h':
|
||||
scan_type = *format == 'h' ?
|
||||
(format++, TYPE_SHORTSHORT) :
|
||||
TYPE_SHORT;
|
||||
break;
|
||||
case 'j': scan_type = TYPE_MAX; break;
|
||||
case 'l':
|
||||
scan_type = *format == 'l' ?
|
||||
(format++, TYPE_LONGLONG) :
|
||||
TYPE_LONG;
|
||||
break;
|
||||
case 'L': scan_type = TYPE_LONGLONG; break;
|
||||
case 't': scan_type = TYPE_PTRDIFF; break;
|
||||
case 'z': scan_type = TYPE_SIZE; break;
|
||||
default: format--;
|
||||
}
|
||||
if ( *format == 'd' || *format == 'i' || *format == 'o' ||
|
||||
*format == 'u' || *format == 'x' || *format == 'X' ||
|
||||
*format == 'p' )
|
||||
{
|
||||
int base;
|
||||
bool is_unsigned;
|
||||
switch ( *format++ )
|
||||
{
|
||||
case 'd': base = 10; is_unsigned = false; break;
|
||||
case 'i': base = 0; is_unsigned = false; break;
|
||||
case 'o': base = 8; is_unsigned = true; break;
|
||||
case 'u': base = 10; is_unsigned = true; break;
|
||||
case 'p':
|
||||
if ( scan_type != TYPE_INT )
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
scan_type = TYPE_PTR;
|
||||
case 'X':
|
||||
case 'x': base = 16; is_unsigned = true; break;
|
||||
default: __builtin_unreachable();
|
||||
}
|
||||
if ( allocate )
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
bool parsed_int = false;
|
||||
uintmax_t int_value = 0;
|
||||
bool negative = false;
|
||||
bool has_prefix = false;
|
||||
bool maybe_base16 = false;
|
||||
if ( !field_width )
|
||||
field_width = SIZE_MAX;
|
||||
size_t i = 0;
|
||||
for ( ; i < field_width; i++ )
|
||||
{
|
||||
ic = fgetc(fp);
|
||||
if ( ic == EOF )
|
||||
break;
|
||||
bytes_parsed++;
|
||||
if ( i == 0 && isspace(ic) )
|
||||
{
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
if ( ic == '-' && i == 0 && !has_prefix )
|
||||
{
|
||||
negative = true;
|
||||
has_prefix = true;
|
||||
}
|
||||
else if ( ic == '+' && i == 0 && !has_prefix )
|
||||
{
|
||||
negative = false;
|
||||
has_prefix = true;
|
||||
}
|
||||
else if ( (ic == 'x' || ic == 'X') &&
|
||||
(base == 0 || base == 16) &&
|
||||
(has_prefix ? i == 2 : i == 1) &&
|
||||
int_value == 0 )
|
||||
{
|
||||
maybe_base16 = true;
|
||||
parsed_int = false;
|
||||
}
|
||||
else if ( ic == '0' && (has_prefix ? i == 1 : i == 0) )
|
||||
{
|
||||
int_value = 0;
|
||||
parsed_int = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( base == 0 )
|
||||
{
|
||||
if ( maybe_base16 )
|
||||
{
|
||||
if ( debase(ic, 16) < 0 )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
bytes_parsed--;
|
||||
break;
|
||||
}
|
||||
base = 16;
|
||||
}
|
||||
else if ( parsed_int )
|
||||
base = 8;
|
||||
else
|
||||
base = 10;
|
||||
}
|
||||
int cval = debase(ic, base);
|
||||
if ( cval < 0 )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
bytes_parsed--;
|
||||
break;
|
||||
}
|
||||
if ( __builtin_mul_overflow(int_value, base, &int_value) )
|
||||
int_value = UINTMAX_MAX;
|
||||
if ( __builtin_add_overflow(int_value, cval, &int_value) )
|
||||
int_value = UINTMAX_MAX;
|
||||
parsed_int = true;
|
||||
}
|
||||
}
|
||||
if ( !parsed_int )
|
||||
return matched_items || i || ic != EOF ? matched_items : EOF;
|
||||
if ( discard )
|
||||
continue;
|
||||
uintmax_t uintmaxval = int_value;
|
||||
if ( negative )
|
||||
uintmaxval = -uintmaxval;
|
||||
intmax_t intmaxval = uintmaxval;
|
||||
bool un = is_unsigned;
|
||||
switch ( scan_type )
|
||||
{
|
||||
case TYPE_SHORTSHORT:
|
||||
if ( un ) *va_arg(ap, unsigned char*) = uintmaxval;
|
||||
else *va_arg(ap, signed char*) = intmaxval;
|
||||
break;
|
||||
case TYPE_SHORT:
|
||||
if ( un ) *va_arg(ap, unsigned short*) = uintmaxval;
|
||||
else *va_arg(ap, signed short*) = intmaxval;
|
||||
break;
|
||||
case TYPE_INT:
|
||||
if ( un ) *va_arg(ap, unsigned int*) = uintmaxval;
|
||||
else *va_arg(ap, signed int*) = intmaxval;
|
||||
break;
|
||||
case TYPE_LONG:
|
||||
if ( un ) *va_arg(ap, unsigned long*) = uintmaxval;
|
||||
else *va_arg(ap, signed long*) = intmaxval;
|
||||
break;
|
||||
case TYPE_LONGLONG:
|
||||
if ( un ) *va_arg(ap, unsigned long long*) = uintmaxval;
|
||||
else *va_arg(ap, signed long long*) = intmaxval;
|
||||
break;
|
||||
case TYPE_PTRDIFF:
|
||||
*va_arg(ap, ptrdiff_t*) = intmaxval;
|
||||
break;
|
||||
case TYPE_SIZE:
|
||||
if ( un ) *va_arg(ap, size_t*) = uintmaxval;
|
||||
else *va_arg(ap, ssize_t*) = intmaxval;
|
||||
break;
|
||||
case TYPE_MAX:
|
||||
if ( un ) *va_arg(ap, uintmax_t*) = uintmaxval;
|
||||
else *va_arg(ap, intmax_t*) = intmaxval;
|
||||
break;
|
||||
case TYPE_PTR:
|
||||
*va_arg(ap, void**) = (void*) (uintptr_t) uintmaxval;
|
||||
break;
|
||||
}
|
||||
matched_items++;
|
||||
}
|
||||
else if ( *format == 's' || *format == '[' || *format == 'c' ||
|
||||
*format == 'C' || *format == 'S' )
|
||||
{
|
||||
bool scanset[256];
|
||||
bool string;
|
||||
bool use_scanset;
|
||||
switch ( *format++ )
|
||||
{
|
||||
case 'S':
|
||||
if ( scan_type != TYPE_INT )
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
scan_type = TYPE_LONG;
|
||||
case 's': string = true; use_scanset = false; break;
|
||||
case '[': string = true; use_scanset = true; break;
|
||||
case 'C':
|
||||
if ( scan_type != TYPE_INT )
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
scan_type = TYPE_LONG;
|
||||
case 'c': string = false; use_scanset = false; break;
|
||||
default: __builtin_unreachable();
|
||||
}
|
||||
if ( use_scanset )
|
||||
{
|
||||
size_t offset = parse_scanset(scanset, format);
|
||||
if ( format[offset] != ']' )
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
format += offset + 1;
|
||||
}
|
||||
if ( scan_type != TYPE_INT )
|
||||
{
|
||||
#ifndef __is_sortix_libk
|
||||
fprintf(stderr,
|
||||
"%s:%u: error: scanf does not support wide strings\n",
|
||||
__FILE__, __LINE__);
|
||||
#endif
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
}
|
||||
if ( !field_width )
|
||||
field_width = string ? SIZE_MAX : 1;
|
||||
char** strptr = NULL;
|
||||
char* str = NULL;
|
||||
size_t strsize = 0;
|
||||
if ( !discard )
|
||||
{
|
||||
if ( allocate )
|
||||
{
|
||||
strptr = va_arg(ap, char**);
|
||||
strsize = 16;
|
||||
str = (char*) malloc(strsize);
|
||||
if ( !str )
|
||||
return matched_items ? matched_items : EOF;
|
||||
}
|
||||
else
|
||||
str = va_arg(ap, char*);
|
||||
}
|
||||
size_t i = 0;
|
||||
while ( i < field_width )
|
||||
{
|
||||
ic = fgetc(fp);
|
||||
if ( ic == EOF )
|
||||
break;
|
||||
bytes_parsed++;
|
||||
if ( string && (use_scanset ? !scanset[ic] : isspace(ic)) )
|
||||
{
|
||||
if ( !use_scanset && !i )
|
||||
continue;
|
||||
ungetc(ic, fp);
|
||||
bytes_parsed--;
|
||||
break;
|
||||
}
|
||||
if ( !discard )
|
||||
{
|
||||
if ( allocate && i + string == strsize )
|
||||
{
|
||||
char* newstr = (char*) reallocarray(str, strsize, 2);
|
||||
if ( !newstr )
|
||||
{
|
||||
free(str);
|
||||
return matched_items ? matched_items : EOF;
|
||||
}
|
||||
str = newstr;
|
||||
strsize *= 2;
|
||||
}
|
||||
str[i++] = (char) ic;
|
||||
}
|
||||
else
|
||||
i++;
|
||||
}
|
||||
if ( string ? !i : i < field_width )
|
||||
{
|
||||
if ( !discard && allocate )
|
||||
free(str);
|
||||
return matched_items || i || ic != EOF ? matched_items : EOF;
|
||||
}
|
||||
if ( !discard )
|
||||
{
|
||||
if ( string )
|
||||
str[i] = '\0';
|
||||
if ( allocate )
|
||||
{
|
||||
char* newstr = realloc(str, i + string);
|
||||
str = newstr ? newstr : str;
|
||||
*strptr = str;
|
||||
}
|
||||
if ( string || i == field_width )
|
||||
matched_items++;
|
||||
}
|
||||
}
|
||||
else if ( *format == 'n' )
|
||||
{
|
||||
format++;
|
||||
if ( allocate )
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
switch ( scan_type )
|
||||
{
|
||||
case TYPE_SHORTSHORT:
|
||||
*va_arg(ap, signed char*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_SHORT:
|
||||
*va_arg(ap, signed short*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_INT:
|
||||
*va_arg(ap, signed int*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_LONG:
|
||||
*va_arg(ap, signed long*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_LONGLONG:
|
||||
*va_arg(ap, signed long long*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_PTRDIFF:
|
||||
*va_arg(ap, ptrdiff_t*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_SIZE:
|
||||
*va_arg(ap, ssize_t*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_MAX:
|
||||
*va_arg(ap, intmax_t*) = bytes_parsed;
|
||||
break;
|
||||
case TYPE_PTR: __builtin_unreachable();
|
||||
}
|
||||
}
|
||||
else if ( *format == 'a' || *format == 'A' ||
|
||||
*format == 'e' || *format == 'E' ||
|
||||
*format == 'f' || *format == 'F' ||
|
||||
*format == 'g' || *format == 'G' )
|
||||
{
|
||||
#ifndef __is_sortix_libk
|
||||
fprintf(stderr, "%s:%u: error: scanf does not support \"%%%c\")\n",
|
||||
__FILE__, __LINE__, *format);
|
||||
#endif
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
}
|
||||
else
|
||||
return errno = EINVAL, matched_items ? matched_items : EOF;
|
||||
}
|
||||
return matched_items;
|
||||
}
|
|
@ -35,5 +35,5 @@ int vfscanf_unlocked(FILE* fp, const char* format, va_list ap)
|
|||
if ( !(fp->flags & _FILE_READABLE) )
|
||||
return errno = EBADF, fp->flags |= _FILE_STATUS_ERROR, EOF;
|
||||
|
||||
return vscanf_callback(fp, wrap_fgetc, wrap_ungetc, format, ap);
|
||||
return vcbscanf(fp, wrap_fgetc, wrap_ungetc, format, ap);
|
||||
}
|
||||
|
|
|
@ -1,354 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, 2014, 2016 Jonas 'Sortie' Termansen.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* stdio/vscanf_callback.c
|
||||
* Input format conversion.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
enum scanmode
|
||||
{
|
||||
MODE_INIT,
|
||||
MODE_CONVSPEC,
|
||||
MODE_SCANINT,
|
||||
MODE_SCANINT_REAL,
|
||||
MODE_SCANSTRING,
|
||||
MODE_SCANSTRING_REAL,
|
||||
MODE_SCANREPORT,
|
||||
};
|
||||
|
||||
enum scantype
|
||||
{
|
||||
TYPE_SHORT,
|
||||
TYPE_SHORTSHORT,
|
||||
TYPE_INT,
|
||||
TYPE_LONG,
|
||||
TYPE_LONGLONG,
|
||||
TYPE_SIZE,
|
||||
TYPE_PTRDIFF,
|
||||
TYPE_MAX,
|
||||
};
|
||||
|
||||
static bool IsTypeModifier(char c)
|
||||
{
|
||||
return c == 'h' || c == 'j' || c == 'l' || c == 'L' || c == 't' || c == 'z';
|
||||
}
|
||||
|
||||
static int debase(char c, int base)
|
||||
{
|
||||
if ( c == '0' )
|
||||
return 0;
|
||||
int ret = -1;
|
||||
if ( '0' <= c && c <= '9' ) { ret = c - '0' + 0; }
|
||||
if ( 'a' <= c && c <= 'f' ) { ret = c - 'a' + 10; }
|
||||
if ( 'A' <= c && c <= 'F' ) { ret = c - 'A' + 10; }
|
||||
if ( base <= ret )
|
||||
return -1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int vscanf_callback(void* fp,
|
||||
int (*fgetc)(void*),
|
||||
int (*ungetc)(int, void*),
|
||||
const char* restrict format,
|
||||
va_list ap)
|
||||
{
|
||||
int matcheditems = 0;
|
||||
size_t fieldwidth = 0;
|
||||
bool escaped = false;
|
||||
bool discard = false;
|
||||
bool negint = false;
|
||||
bool intunsigned = false;
|
||||
bool leadingzero = false;
|
||||
bool hasprefix = false;
|
||||
bool string = false;
|
||||
size_t intparsed = 0;
|
||||
uintmax_t intvalue = 0;
|
||||
int ic;
|
||||
int base = 0;
|
||||
int cval;
|
||||
const size_t UNDO_MAX = 4;
|
||||
int undodata[UNDO_MAX];
|
||||
size_t undoable = 0;
|
||||
size_t strwritten = 0;
|
||||
char* strdest = NULL;
|
||||
char convc;
|
||||
int bytesparsed = 0;
|
||||
enum scantype scantype = TYPE_INT;
|
||||
enum scanmode scanmode = MODE_INIT;
|
||||
while ( true )
|
||||
{
|
||||
ic = fgetc(fp);
|
||||
if ( ic != EOF && bytesparsed != INT_MAX )
|
||||
bytesparsed++;
|
||||
unsigned char uc = ic; char c = uc;
|
||||
switch (scanmode)
|
||||
{
|
||||
case MODE_INIT:
|
||||
if ( !*format )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
goto break_loop;
|
||||
}
|
||||
if ( isspace((unsigned char) *format) )
|
||||
{
|
||||
if ( isspace(ic) )
|
||||
continue;
|
||||
else
|
||||
do format++;
|
||||
while ( isspace((unsigned char) *format) );
|
||||
}
|
||||
if ( *format == '%' && !escaped )
|
||||
{
|
||||
format++;
|
||||
scanmode = MODE_CONVSPEC;
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
continue;
|
||||
}
|
||||
escaped = false;
|
||||
if ( *format != c )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
goto break_loop;
|
||||
}
|
||||
format++;
|
||||
break;
|
||||
case MODE_CONVSPEC:
|
||||
discard = false;
|
||||
if ( *format == '*' ) { discard = true; format++; }
|
||||
fieldwidth = 0;
|
||||
while ( '0'<= *format && *format <= '9' )
|
||||
fieldwidth = fieldwidth * 10 + *format++ - '0';
|
||||
scantype = TYPE_INT;
|
||||
while ( IsTypeModifier(*format) )
|
||||
switch ( *format++ )
|
||||
{
|
||||
case 'h': scantype = scantype == TYPE_SHORT ? TYPE_SHORTSHORT
|
||||
: TYPE_SHORT; break;
|
||||
case 'j': scantype = TYPE_MAX; break;
|
||||
case 'l': scantype = scantype == TYPE_LONG ? TYPE_LONGLONG
|
||||
: TYPE_LONG; break;
|
||||
case 'L': scantype = TYPE_LONGLONG; break;
|
||||
case 't': scantype = TYPE_PTRDIFF; break;
|
||||
case 'z': scantype = TYPE_SIZE; break;
|
||||
}
|
||||
|
||||
switch ( (convc = *format++) )
|
||||
{
|
||||
case '%':
|
||||
escaped = true;
|
||||
default:
|
||||
fprintf(stderr, "Warning: scanf does not support %c (%i)\n",
|
||||
convc, convc);
|
||||
fprintf(stderr, "Bailing out to prevent problems.\n");
|
||||
errno = ENOTSUP;
|
||||
return -1;
|
||||
continue;
|
||||
case 'd':
|
||||
base = 10; scanmode = MODE_SCANINT; intunsigned = false; break;
|
||||
case 'i':
|
||||
base = 0; scanmode = MODE_SCANINT; intunsigned = false; break;
|
||||
case 'o':
|
||||
base = 0; scanmode = MODE_SCANINT; intunsigned = true; break;
|
||||
case 'u':
|
||||
base = 10; scanmode = MODE_SCANINT; intunsigned = true; break;
|
||||
case 'x':
|
||||
case 'X':
|
||||
base = 16; scanmode = MODE_SCANINT; intunsigned = true; break;
|
||||
case 'c':
|
||||
string = false; scanmode = MODE_SCANSTRING; break;
|
||||
case 's':
|
||||
string = true; scanmode = MODE_SCANSTRING; break;
|
||||
case 'n':
|
||||
scanmode = MODE_SCANREPORT; break;
|
||||
}
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
continue;
|
||||
case MODE_SCANINT:
|
||||
intparsed = 0;
|
||||
intvalue = 0;
|
||||
leadingzero = false;
|
||||
negint = false;
|
||||
hasprefix = false;
|
||||
undoable = 0;
|
||||
scanmode = MODE_SCANINT_REAL;
|
||||
case MODE_SCANINT_REAL:
|
||||
if ( fieldwidth )
|
||||
{
|
||||
fprintf(stderr, "Error: field width not supported for integers in scanf.\n");
|
||||
errno = ENOTSUP;
|
||||
return -1;
|
||||
}
|
||||
if ( !undoable && isspace(ic) )
|
||||
continue;
|
||||
if ( undoable < UNDO_MAX )
|
||||
undodata[undoable++] = ic;
|
||||
if ( !intparsed && c == '-' && !intunsigned && !negint )
|
||||
{
|
||||
negint = true;
|
||||
continue;
|
||||
}
|
||||
if ( !intparsed && c == '0' && !hasprefix &&
|
||||
(!base || base == 8 || base == 16) && !leadingzero )
|
||||
leadingzero = true;
|
||||
if ( intparsed == 1 && (c == 'x' || c == 'X') && !hasprefix &&
|
||||
(!base || base == 16) && leadingzero )
|
||||
{
|
||||
base = 16;
|
||||
leadingzero = false;
|
||||
hasprefix = true;
|
||||
intparsed = 0;
|
||||
continue;
|
||||
}
|
||||
else if ( intparsed == 1 && '1' <= c && c <= '7' && !hasprefix &&
|
||||
(!base || base == 8) && leadingzero )
|
||||
{
|
||||
base = 8;
|
||||
hasprefix = true;
|
||||
leadingzero = false;
|
||||
}
|
||||
else if ( !intparsed && '0' <= c && c <= '9' && !hasprefix &&
|
||||
(!base || base == 10) && !leadingzero )
|
||||
{
|
||||
base = 10;
|
||||
leadingzero = false;
|
||||
hasprefix = true;
|
||||
}
|
||||
cval = debase(c, base);
|
||||
if ( cval < 0 )
|
||||
{
|
||||
if ( !intparsed )
|
||||
{
|
||||
while ( undoable )
|
||||
{
|
||||
ungetc(undodata[--undoable], fp);
|
||||
bytesparsed--;
|
||||
}
|
||||
goto break_loop;
|
||||
}
|
||||
scanmode = MODE_INIT;
|
||||
undoable = 0;
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
if ( discard ) { discard = false; continue; }
|
||||
uintmax_t uintmaxval = intvalue;
|
||||
// TODO: Possible truncation of INTMAX_MIN!
|
||||
intmax_t intmaxval = uintmaxval;
|
||||
if ( negint ) intmaxval = -intmaxval;
|
||||
bool un = intunsigned;
|
||||
switch ( scantype )
|
||||
{
|
||||
case TYPE_SHORTSHORT:
|
||||
if ( un ) *va_arg(ap, unsigned char*) = uintmaxval;
|
||||
else *va_arg(ap, signed char*) = intmaxval;
|
||||
break;
|
||||
case TYPE_SHORT:
|
||||
if ( un ) *va_arg(ap, unsigned short*) = uintmaxval;
|
||||
else *va_arg(ap, signed short*) = intmaxval;
|
||||
break;
|
||||
case TYPE_INT:
|
||||
if ( un ) *va_arg(ap, unsigned int*) = uintmaxval;
|
||||
else *va_arg(ap, signed int*) = intmaxval;
|
||||
break;
|
||||
case TYPE_LONG:
|
||||
if ( un ) *va_arg(ap, unsigned long*) = uintmaxval;
|
||||
else *va_arg(ap, signed long*) = intmaxval;
|
||||
break;
|
||||
case TYPE_LONGLONG:
|
||||
if ( un ) *va_arg(ap, unsigned long long*) = uintmaxval;
|
||||
else *va_arg(ap, signed long long*) = intmaxval;
|
||||
break;
|
||||
case TYPE_PTRDIFF:
|
||||
*va_arg(ap, ptrdiff_t*) = intmaxval;
|
||||
break;
|
||||
case TYPE_SIZE:
|
||||
if ( un ) *va_arg(ap, size_t*) = uintmaxval;
|
||||
else *va_arg(ap, ssize_t*) = intmaxval;
|
||||
break;
|
||||
case TYPE_MAX:
|
||||
if ( un ) *va_arg(ap, uintmax_t*) = uintmaxval;
|
||||
else *va_arg(ap, intmax_t*) = intmaxval;
|
||||
break;
|
||||
}
|
||||
matcheditems++;
|
||||
continue;
|
||||
}
|
||||
intvalue = intvalue * (uintmax_t) base + (uintmax_t) cval;
|
||||
intparsed++;
|
||||
continue;
|
||||
case MODE_SCANSTRING:
|
||||
if ( !fieldwidth )
|
||||
fieldwidth = string ? SIZE_MAX : 1;
|
||||
scanmode = MODE_SCANSTRING_REAL;
|
||||
strwritten = 0;
|
||||
strdest = discard ? NULL : va_arg(ap, char*);
|
||||
case MODE_SCANSTRING_REAL:
|
||||
if ( string && !strwritten && isspace(ic) )
|
||||
continue;
|
||||
if ( string && strwritten &&
|
||||
(ic == EOF || isspace(ic) || strwritten == fieldwidth) )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
if ( !discard )
|
||||
strdest[strwritten] = '\0';
|
||||
matcheditems++;
|
||||
scanmode = MODE_INIT;
|
||||
continue;
|
||||
}
|
||||
if ( !string && strwritten == fieldwidth )
|
||||
{
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
scanmode = MODE_INIT;
|
||||
continue;
|
||||
}
|
||||
if ( ic == EOF )
|
||||
goto break_loop;
|
||||
if ( !discard )
|
||||
strdest[strwritten++] = c;
|
||||
continue;
|
||||
case MODE_SCANREPORT:
|
||||
ungetc(ic, fp);
|
||||
if ( ic != EOF )
|
||||
bytesparsed--;
|
||||
if ( !discard )
|
||||
*va_arg(ap, int*) = bytesparsed;
|
||||
scanmode = MODE_INIT;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break_loop:
|
||||
return matcheditems;
|
||||
}
|
|
@ -54,5 +54,5 @@ int vsscanf(const char* str, const char* format, va_list ap)
|
|||
struct vsscanf_input input;
|
||||
input.str = str;
|
||||
input.offset = 0;
|
||||
return vscanf_callback(&input, vsscanf_fgetc, vsscanf_ungetc, format, ap);
|
||||
return vcbscanf(&input, vsscanf_fgetc, vsscanf_ungetc, format, ap);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue