/* Copyright (C) 2008 Emmanuel Varoquaux This file is part of XOS. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "stdio.h" #include "ctype.h" #include #include #include #include #include /* Fonction de lecture */ struct input_sequence_struct; struct input_sequence_vtbl_struct { int (*get)(struct input_sequence_struct *); void (*unget)(struct input_sequence_struct *); }; struct input_sequence_struct { const struct input_sequence_vtbl_struct *vptr; unsigned error : 1; unsigned eof : 1; int nread; union { struct { FILE *stream; int last_read; } stream_input_sequence; struct { const char *str; } string_input_sequence; }; }; static void input_sequence_init(struct input_sequence_struct *input_sequence) { input_sequence->error = 0; input_sequence->eof = 0; input_sequence->nread = 0; } static int input_sequence_get(struct input_sequence_struct *input_sequence) { int c; if (input_sequence->eof) return EOF; if ((c = input_sequence->vptr->get(input_sequence)) == EOF) return EOF; input_sequence->nread++; return c; } static void input_sequence_unget(struct input_sequence_struct *input_sequence) { input_sequence->vptr->unget(input_sequence); input_sequence->eof = 0; input_sequence->nread--; } /* Impression dans un flux */ static int stream_input_sequence_get(struct input_sequence_struct *input_sequence) { int c; if ((c = __fgetc(input_sequence->stream_input_sequence.stream)) == EOF) { if (__ferror(input_sequence->stream_input_sequence.stream)) input_sequence->error = 1; if (__feof(input_sequence->stream_input_sequence.stream)) input_sequence->eof = 1; return EOF; } input_sequence->stream_input_sequence.last_read = c; return c; } static void stream_input_sequence_unget(struct input_sequence_struct *input_sequence) { __ungetc(input_sequence->stream_input_sequence.last_read, input_sequence->stream_input_sequence.stream); } static const struct input_sequence_vtbl_struct stream_input_sequence_vtbl = { .get = stream_input_sequence_get, .unget = stream_input_sequence_unget }; static void stream_input_sequence_init(struct input_sequence_struct *input_sequence, FILE *stream) { input_sequence->vptr = &stream_input_sequence_vtbl; input_sequence_init(input_sequence); input_sequence->stream_input_sequence.stream = stream; } /* Impression dans une chaine de caracteres */ static int string_input_sequence_get(struct input_sequence_struct *input_sequence) { char c; if (!(c = *input_sequence->string_input_sequence.str++)) { input_sequence->eof = 1; return EOF; } return (unsigned char)c; } static void string_input_sequence_unget(struct input_sequence_struct *input_sequence) { input_sequence->string_input_sequence.str--; } static const struct input_sequence_vtbl_struct string_input_sequence_vtbl = { .get = string_input_sequence_get, .unget = string_input_sequence_unget }; static void string_input_sequence_init(struct input_sequence_struct *input_sequence, const char *str) { input_sequence->vptr = &string_input_sequence_vtbl; input_sequence_init(input_sequence); input_sequence->string_input_sequence.str = str; } /* Options de conversion */ enum {LM_DEFAULT, LM_SHORT, LM_LONG}; struct conversion_options_struct { unsigned suppress_assignment : 1; unsigned int field_width; int length_modifier; }; static const struct conversion_options_struct default_conversion_options = { .suppress_assignment = 0, .field_width = 0, .length_modifier = LM_DEFAULT }; static inline int is_length_modifier(char c) { return (int)strchr("hlzt", c); } static void set_length_modifier(struct conversion_options_struct *conversion_options, char c) { switch (c) { case 'h': conversion_options->length_modifier = LM_SHORT; break; case 'l': conversion_options->length_modifier = LM_LONG; break; case 'z': if (sizeof (size_t) > sizeof (unsigned int)) conversion_options->length_modifier = LM_LONG; break; case 't': if (sizeof (ptrdiff_t) > sizeof (unsigned int)) conversion_options->length_modifier = LM_LONG; break; } } /* Construction d'un entier decimal */ static inline int is_digit(char c) { return __isdigit((unsigned char)c); } /* Le comportement est indefini si add_digit() conduit a un debordement. */ static void add_digit(unsigned int *n, char c) { *n = *n * 10 + (c - '0'); } /* Chaine de format */ enum {ST_INPUT_FAILURE, ST_MATCHING_FAILURE}; static int skip_whitespaces(struct input_sequence_struct *input_sequence) { int c; while (1) { if ((c = input_sequence_get(input_sequence)) == EOF) return EOF; if (!__isspace(c)) { input_sequence_unget(input_sequence); return 0; } } } static const char digits_lc[] = "0123456789abcdef"; static const char digits_uc[] = "0123456789ABCDEF"; static int convert_integer(int base, unsigned int maximum_field_width, struct input_sequence_struct *input_sequence, int uconv, long *result, int *status) { unsigned int field_width; int negative; long val; int state; char *p; int c, i; field_width = 0; negative = 0; val = 0; state = 0; while (1) { if (maximum_field_width && field_width == maximum_field_width) break; if ((c = input_sequence_get(input_sequence)) == EOF) { if (input_sequence->error) { *status = ST_INPUT_FAILURE; return EOF; } break; } switch (state) { case 0: if (c == '+') { field_width++; negative = 0; state = 1; continue; } else if (c == '-') { field_width++; negative = 1; state = 1; continue; } case 1: if (base == 0) { if (c == '0') { field_width++; val = 0; state = 3; continue; } base = 10; } else if (base == 16 && c == '0') { field_width++; val = 0; state = 4; continue; } if ((p = strchr(digits_lc, c))) i = p - digits_lc; else if ((p = strchr(digits_uc, c))) i = p - digits_uc; else { input_sequence_unget(input_sequence); *status = ST_MATCHING_FAILURE; return EOF; } if (i >= base) { input_sequence_unget(input_sequence); *status = ST_MATCHING_FAILURE; return EOF; } field_width++; val = uconv || !negative ? i : -i; state = 2; continue; case 2: _2: if ((p = strchr(digits_lc, c))) i = p - digits_lc; else if ((p = strchr(digits_uc, c))) i = p - digits_uc; else { input_sequence_unget(input_sequence); goto end; } if (i >= base) { input_sequence_unget(input_sequence); goto end; } field_width++; if (uconv) { if ((unsigned long)val > (ULONG_MAX - i) / base) { errno = ERANGE; val = ULONG_MAX; } else val = val * base + i; } else { if (!negative) { if (val > (LONG_MAX - i) / base) { errno = ERANGE; val = LONG_MAX; } else val = val * base + i; } else { if (val < (LONG_MIN + i) / base) { errno = ERANGE; val = LONG_MIN; } else val = val * base - i; } } state = 2; continue; case 3: if (c == 'x' || c == 'X') { field_width++; base = 16; state = 2; continue; } base = 8; goto _2; case 4: if (c == 'x' || c == 'X') { field_width++; state = 2; continue; } goto _2; } } if (state == 1) { *status = ST_MATCHING_FAILURE; return EOF; } end: *result = uconv && negative ? -val : val; return 0; } struct arguments_struct { va_list ap; int count; }; static void arguments_init(struct arguments_struct *arguments, va_list ap) { arguments->ap = ap; arguments->count = 0; } static void arguments_assign_signed(struct arguments_struct *arguments, int length_modifier, long val) { switch (length_modifier) { case LM_DEFAULT: *va_arg(arguments->ap, int *) = val; break; case LM_SHORT: *va_arg(arguments->ap, short *) = val; break; case LM_LONG: *va_arg(arguments->ap, long *) = val; break; } arguments->count++; } static void arguments_assign_unsigned(struct arguments_struct *arguments, int length_modifier, long val) { switch (length_modifier) { case LM_DEFAULT: *va_arg(arguments->ap, unsigned int *) = val; break; case LM_SHORT: *va_arg(arguments->ap, unsigned short *) = val; break; case LM_LONG: *va_arg(arguments->ap, unsigned long *) = val; break; } arguments->count++; } static char *arguments_pop_array(struct arguments_struct *arguments) { char *buf; buf = va_arg(arguments->ap, char *); arguments->count++; return buf; } static void arguments_assign_pointer(struct arguments_struct *arguments, void *p) { *va_arg(arguments->ap, void **) = p; arguments->count++; } static void arguments_store_integer(struct arguments_struct *arguments, int val) { *va_arg(arguments->ap, int *) = val; } static int apply_whitespaces(struct input_sequence_struct *input_sequence, int *status) { if (skip_whitespaces(input_sequence) == EOF && input_sequence->error) { *status = ST_INPUT_FAILURE; return EOF; } return 0; } static int apply_ordinary_character(char c, struct input_sequence_struct *input_sequence, int *status) { int d; if ((d = input_sequence_get(input_sequence)) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if (d != c) { input_sequence_unget(input_sequence); *status = ST_MATCHING_FAILURE; return EOF; } return 0; } static int apply_signed_integer(int base, const struct conversion_options_struct *conversion_options, struct input_sequence_struct *input_sequence, struct arguments_struct *arguments, int *status) { long val; if (skip_whitespaces(input_sequence) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if (convert_integer(base, conversion_options->field_width, input_sequence, 0, &val, status) == EOF) return EOF; if (!conversion_options->suppress_assignment) arguments_assign_signed(arguments, conversion_options->length_modifier, val); return 0; } static int apply_unsigned_integer(int base, const struct conversion_options_struct *conversion_options, struct input_sequence_struct *input_sequence, struct arguments_struct *arguments, int *status) { unsigned long val; if (skip_whitespaces(input_sequence) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if (convert_integer(base, conversion_options->field_width, input_sequence, 1, (long *)&val, status) == EOF) return EOF; if (!conversion_options->suppress_assignment) arguments_assign_unsigned(arguments, conversion_options->length_modifier, val); return 0; } static int apply_string(const struct conversion_options_struct *conversion_options, struct input_sequence_struct *input_sequence, struct arguments_struct *arguments, int *status) { char *s; unsigned int field_width; int c; if (skip_whitespaces(input_sequence) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if (!conversion_options->suppress_assignment) s = arguments_pop_array(arguments); else s = NULL; /* evite un warning de compilation */ field_width = 0; while (1) { if (conversion_options->field_width && field_width == conversion_options->field_width) break; if ((c = input_sequence_get(input_sequence)) == EOF) { if (input_sequence->error) { *status = ST_INPUT_FAILURE; return EOF; } break; } if (__isspace(c)) { input_sequence_unget(input_sequence); break; } field_width++; if (!conversion_options->suppress_assignment) *s++ = c; } if (!conversion_options->suppress_assignment) *s = '\0'; return 0; } static int scanlist_match(char c, const char *scanlist) { int state; char d; int belong; char begin, end; state = 0; belong = 1; end = begin = '\0'; while (1) { d = *scanlist++; switch (state) { case 0: if (d == '^') { belong = 0; state = 2; continue; } goto _2; case 1: _1: if (d == ']') return !belong; case 2: _2: begin = d; state = 3; continue; case 3: if (d == '-') { state = 4; continue; } if (c == begin) return belong; goto _1; case 4: if (d == ']') { if (c == begin) return belong; if (c == '-') return belong; return !belong; } end = d; if (c >= begin && c <= end) return belong; state = 1; continue; } } } static int apply_scanset(const char *scanlist, const struct conversion_options_struct *conversion_options, struct input_sequence_struct *input_sequence, struct arguments_struct *arguments, int *status) { unsigned int field_width; char *s; int c; if ((c = input_sequence_get(input_sequence)) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if (!scanlist_match(c, scanlist)) { input_sequence_unget(input_sequence); *status = ST_MATCHING_FAILURE; return EOF; } if (!conversion_options->suppress_assignment) s = arguments_pop_array(arguments); else s = NULL; /* evite un warning de compilation */ field_width = 0; while (1) { field_width++; if (!conversion_options->suppress_assignment) *s++ = c; if (conversion_options->field_width && field_width == conversion_options->field_width) break; if ((c = input_sequence_get(input_sequence)) == EOF) { if (input_sequence->error) { *status = ST_INPUT_FAILURE; return EOF; } break; } if (!scanlist_match(c, scanlist)) { input_sequence_unget(input_sequence); break; } } if (!conversion_options->suppress_assignment) *s = '\0'; return 0; } static int apply_byte_sequence(const struct conversion_options_struct *conversion_options, struct input_sequence_struct *input_sequence, struct arguments_struct *arguments, int *status) { unsigned int count; char *s; int c; if ((c = input_sequence_get(input_sequence)) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if (!conversion_options->suppress_assignment) s = arguments_pop_array(arguments); else s = NULL; /* evite un warning de compilation */ count = conversion_options->field_width ? : 1; while (1) { if (!conversion_options->suppress_assignment) *s++ = c; if (!--count) break; if ((c = input_sequence_get(input_sequence)) == EOF) { if (input_sequence->error) { *status = ST_INPUT_FAILURE; return EOF; } break; } } return 0; } static int apply_pointer(const struct conversion_options_struct *conversion_options, struct input_sequence_struct *input_sequence, struct arguments_struct *arguments, int *status) { unsigned long val; char c; const char *s; void *p; if (skip_whitespaces(input_sequence) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if ((c = input_sequence_get(input_sequence)) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } s = "(nil)"; if (c == *s) { while (*++s) { if ((c = input_sequence_get(input_sequence)) == EOF) { *status = input_sequence->error ? ST_INPUT_FAILURE : ST_MATCHING_FAILURE; return EOF; } if (c != *s) { input_sequence_unget(input_sequence); *status = ST_MATCHING_FAILURE; return EOF; } } p = NULL; } else { input_sequence_unget(input_sequence); if (convert_integer(16, conversion_options->field_width, input_sequence, 1, (long *)&val, status) == EOF) return EOF; p = (void *)val; } if (!conversion_options->suppress_assignment) arguments_assign_pointer(arguments, p); return 0; } static void apply_count(const struct conversion_options_struct *conversion_options, struct input_sequence_struct *input_sequence, struct arguments_struct *arguments) { if (!conversion_options->suppress_assignment) arguments_store_integer(arguments, input_sequence->nread); } static int apply_percent(struct input_sequence_struct *input_sequence, int *status) { int c; if (skip_whitespaces(input_sequence) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if ((c = input_sequence_get(input_sequence)) == EOF) { *status = ST_INPUT_FAILURE; return EOF; } if (c != '%') { input_sequence_unget(input_sequence); *status = ST_MATCHING_FAILURE; return EOF; } return 0; } enum { ST_READ, ST_READ_SPACE_1, ST_READ_FLAG, ST_READ_FIELD_WIDTH, ST_READ_FIELD_WIDTH_1, ST_READ_SPECIFIER, ST_SCANLIST_1, ST_SCANLIST_2, ST_SCANLIST_3 }; static int scan_format(const char *format, va_list ap, struct input_sequence_struct *input_sequence) { struct arguments_struct arguments; int state; char c; struct conversion_options_struct conversion_options; unsigned int n; const char *scanlist; int status; if (!format) { errno = EINVAL; return EOF; } arguments_init(&arguments, ap); state = ST_READ; conversion_options = default_conversion_options; n = 0; scanlist = NULL; status = ST_INPUT_FAILURE; while ((c = *format++)) { switch (state) { case ST_READ: if (__isspace(c)) { /* espace */ state = ST_READ_SPACE_1; continue; } not_whitespace: if (c != '%') { /* caractere ordinaire */ if (apply_ordinary_character(c, input_sequence, &status) == EOF) goto failure; state = ST_READ; continue; } /* specification de conversion */ conversion_options = default_conversion_options; state = ST_READ_FLAG; continue; case ST_READ_FLAG: if (c == '*') { conversion_options.suppress_assignment = 1; state = ST_READ_FIELD_WIDTH; continue; } case ST_READ_FIELD_WIDTH: if (is_digit(c)) { n = 0; add_digit(&n, c); state = ST_READ_FIELD_WIDTH_1; continue; } read_length_modifier: if (is_length_modifier(c)) { set_length_modifier(&conversion_options, c); state = ST_READ_SPECIFIER; continue; } case ST_READ_SPECIFIER: switch (c) { case 'd': if (apply_signed_integer(10, &conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case 'i': if (apply_signed_integer(0, &conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case 'o': if (apply_unsigned_integer(8, &conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case 'u': if (apply_unsigned_integer(10, &conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case 'x': case 'X': if (apply_unsigned_integer(16, &conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case 's': if (apply_string(&conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case '[': scanlist = format; state = ST_SCANLIST_1; continue; case 'c': if (apply_byte_sequence(&conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case 'p': if (apply_pointer(&conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; case 'n': apply_count(&conversion_options, input_sequence, &arguments); state = ST_READ; continue; case '%': if (apply_percent(input_sequence, &status) == EOF) goto failure; state = ST_READ; continue; default: goto end; } case ST_READ_SPACE_1: if (__isspace(c)) { state = ST_READ_SPACE_1; continue; } if (apply_whitespaces(input_sequence, &status) == EOF) goto failure; state = ST_READ; goto not_whitespace; case ST_READ_FIELD_WIDTH_1: if (is_digit(c)) { add_digit(&n, c); state = ST_READ_FIELD_WIDTH_1; continue; } conversion_options.field_width = n; goto read_length_modifier; case ST_SCANLIST_1: if (c == '^') { state = ST_SCANLIST_2; continue; } case ST_SCANLIST_2: state = ST_SCANLIST_3; continue; case ST_SCANLIST_3: if (c != ']') { state = ST_SCANLIST_3; continue; } if (apply_scanset(scanlist, &conversion_options, input_sequence, &arguments, &status) == EOF) goto failure; state = ST_READ; continue; } } if (state == ST_READ_SPACE_1) if (apply_whitespaces(input_sequence, &status) == EOF) goto failure; end: return arguments.count; failure: switch (status) { case ST_INPUT_FAILURE: if (input_sequence->error) return EOF; return arguments.count ? : EOF; case ST_MATCHING_FAILURE: return arguments.count; } return n; } /* Fonctions exportees */ int __attribute__ ((weak, visibility ("default"))) scanf(const char *format, ...) { struct input_sequence_struct input_sequence; va_list ap; int retval; stream_input_sequence_init(&input_sequence, stdin); va_start(ap, format); retval = scan_format(format, ap, &input_sequence); va_end(ap); return retval; } int __attribute__ ((weak, visibility ("default"))) fscanf(FILE *stream, const char *format, ...) { struct input_sequence_struct input_sequence; va_list ap; int retval; stream_input_sequence_init(&input_sequence, stream); va_start(ap, format); retval = scan_format(format, ap, &input_sequence); va_end(ap); return retval; } int __attribute__ ((weak, visibility ("default"))) sscanf(const char *s, const char *format, ...) { struct input_sequence_struct input_sequence; va_list ap; int retval; string_input_sequence_init(&input_sequence, s); va_start(ap, format); retval = scan_format(format, ap, &input_sequence); va_end(ap); return retval; } int __attribute__ ((weak, visibility ("default"))) vscanf(const char *format, va_list ap) { struct input_sequence_struct input_sequence; stream_input_sequence_init(&input_sequence, stdin); return scan_format(format, ap, &input_sequence); } int __attribute__ ((weak, visibility ("default"))) vfscanf(FILE *stream, const char *format, va_list ap) { struct input_sequence_struct input_sequence; stream_input_sequence_init(&input_sequence, stream); return scan_format(format, ap, &input_sequence); } int __attribute__ ((weak, visibility ("default"))) vsscanf(const char *s, const char *format, va_list ap) { struct input_sequence_struct input_sequence; string_input_sequence_init(&input_sequence, s); return scan_format(format, ap, &input_sequence); }