View of xos/usr/xsh/parse.c


XOS | Parent Directory | View | Download

/* Copyright (C) 2008  Emmanuel Varoquaux
 
   This file is part of XOS.
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
 
#include "parse.h"
 
#include "free_cmd.h"
#include "command.h"
#include "lex.h"
#include "safe_malloc.h"
#include "error.h"
#include "vars.h"
 
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
 
/* classes de tokens */
#define TC_AND_OR             0x01
#define TC_REDIRECT_OPERATOR  0x02
#define TC_SEPARATOR_OPERATOR 0x04
#define TC_OPEN_PARENTHESIS   0x08
#define TC_NEWLINE            0x20
#define TC_NUMBER             0x40
#define TC_WORD               0x80
 
/* classes de tokens prefixant les regles (lookahead) */
#define LA_SEPARATOR        (LA_SEPARATOR_OP | LA_NEWLINE_LIST)
#define LA_SEPARATOR_OP     TC_SEPARATOR_OPERATOR
#define LA_NEWLINE_LIST     TC_NEWLINE
#define LA_FILENAME         TC_WORD
#define LA_IO_FILE          TC_REDIRECT_OPERATOR
#define LA_IO_REDIRECT      (LA_IO_FILE | TC_NUMBER)
#define LA_REDIRECT_LIST    LA_IO_REDIRECT
#define LA_CMD_SUFFIX       (LA_IO_REDIRECT | TC_WORD)
#define LA_CMD_PREFIX       LA_IO_REDIRECT
#define LA_CMD_WORD         TC_WORD
#define LA_CMD_NAME         TC_WORD
#define LA_SIMPLE_COMMAND   (LA_CMD_PREFIX | LA_CMD_NAME)
#define LA_TERM             LA_AND_OR
#define LA_COMPOUND_LIST    (LA_TERM | LA_NEWLINE_LIST)
#define LA_SUBSHELL         TC_OPEN_PARENTHESIS
#define LA_COMPOUND_COMMAND LA_SUBSHELL
#define LA_COMMAND          (LA_SIMPLE_COMMAND | LA_COMPOUND_COMMAND)
#define LA_PIPE_SEQUENCE    LA_COMMAND
#define LA_PIPELINE         LA_PIPE_SEQUENCE
#define LA_AND_OR           LA_PIPELINE
#define LA_LIST             LA_AND_OR
#define LA_COMPLETE_COMMAND LA_LIST
 
static int current_token_class;
 
int parser_eof;
struct list_struct *current_list;
 
/* Lecture de tokens */
 
static int lookahead = 0;
 
static int get_current_token_class()
{
  switch (current_token.type) {
  case TT_AND_AND:
  case TT_OR_OR:
    return TC_AND_OR;
  case TT_GREATER_GREATER:
  case TT_LESS_AND:
  case TT_GREATER_AND:
  case TT_LESS_GREATER:
  case TT_GREATER_BAR:
  case TT_GREATER:
  case TT_LESS:
    return TC_REDIRECT_OPERATOR;
  case TT_SEMICOLON:
  case TT_AMPERSAND:
    return TC_SEPARATOR_OPERATOR;
  case TT_LEFT_PARENTHESIS:
    return TC_OPEN_PARENTHESIS;
  case TT_NEWLINE:
    return TC_NEWLINE;
  case TT_NUMBER:
    return TC_NUMBER;
  case TT_WORD:
    return TC_WORD;
  default:
    return 0;
  }
}
 
static void get_token()
{
  if (lookahead)
    lookahead = 0;
  else {
    lex();
    current_token_class = get_current_token_class();
  }
}
 
static void unget_token()
{
  lookahead = 1;
}
 
/* Analyse syntaxique */
 
static void parser_error()
{
  char buf[256];
 
  switch (current_token.type) {
  case TT_AND_AND:
    parse_error("&&");
    break;
  case TT_OR_OR:
    parse_error("||");
    break;
  case TT_GREATER_GREATER:
    parse_error(">>");
    break;
  case TT_LESS_AND:
    parse_error("<&");
    break;
  case TT_GREATER_AND:
    parse_error(">&");
    break;
  case TT_LESS_GREATER:
    parse_error("<>");
    break;
  case TT_GREATER_BAR:
    parse_error(">|");
    break;
  case TT_GREATER:
    parse_error(">");
    break;
  case TT_LESS:
    parse_error("<");
    break;
  case TT_SEMICOLON:
    parse_error(";");
    break;
  case TT_LEFT_PARENTHESIS:
    parse_error("(");
    break;
  case TT_RIGHT_PARENTHESIS:
    parse_error(")");
    break;
  case TT_BAR:
    parse_error("|");
    break;
  case TT_AMPERSAND:
    parse_error("&");
    break;
  case TT_NEWLINE:
    parse_error("newline");
    break;
  case TT_NUMBER:
    sprintf(buf, "%d", current_token.number);
    parse_error(buf);
    break;
  case TT_WORD:
    parse_error(current_token.word);
    break;
  case TT_END_OF_INPUT:
    parse_error("end of file");
    break;
  }
}
 
static void read_separator_op(int *asynchronous);
static void read_linebreak();
static void read_newline_list();
static int read_and_or(struct pipeline_struct **pipeline);
 
static void read_separator(int *asynchronous)
{
  if (current_token_class & LA_SEPARATOR_OP) {
    read_separator_op(asynchronous);
    read_linebreak();
  }
  else {
    *asynchronous = 0;
    read_newline_list();
  }
}
 
static void read_separator_op(int *asynchronous)
{
  switch (current_token.type) {
  case TT_SEMICOLON:
    *asynchronous = 0;
    break;
  case TT_AMPERSAND:
    *asynchronous = 1;
    break;
  }
}
 
static void read_linebreak()
{
  get_token();
  if (current_token_class & LA_NEWLINE_LIST)
    read_newline_list();
  else
    unget_token();
}
 
static void read_newline_list()
{
  do
    get_token();
  while (current_token_class & LA_NEWLINE_LIST);
  unget_token();
}
 
static void read_filename(char **filename)
{
  *filename = current_token.word;
}
 
static int read_io_file(struct redirect_struct *redirect)
{
  switch (current_token.type) {
  case TT_LESS:
    redirect->fd = 0;
    redirect->type = RT_REDIRECT_INPUT;
    break;
  case TT_LESS_AND:
    redirect->fd = 0;
    redirect->type = RT_DUPLICATE_INPUT;
    break;
  case TT_GREATER:
    redirect->fd = 1;
    redirect->type = RT_REDIRECT_OUTPUT;
    break;
  case TT_GREATER_AND:
    redirect->fd = 1;
    redirect->type = RT_DUPLICATE_OUTPUT;
    break;
  case TT_GREATER_GREATER:
    redirect->fd = 1;
    redirect->type = RT_REDIRECT_OUTPUT_APPEND;
    break;
  case TT_LESS_GREATER:
    redirect->fd = 0;
    redirect->type = RT_READ_WRITE;
    break;
  case TT_GREATER_BAR:
    redirect->fd = 1;
    redirect->type = RT_REDIRECT_OUTPUT_FORCE;
    break;
  }
  get_token();
  if (!(current_token_class & LA_FILENAME)) {
    parser_error();
    return -1;
  }
  read_filename(&redirect->filename);
  return 0;
}
 
static int read_io_redirect(struct redirect_struct *redirect)
{
  int fd;
 
  if (current_token.type == TT_NUMBER) {
    fd = current_token.number;
    get_token();
    if (!(current_token_class & LA_IO_FILE)) {
      parser_error();
      return -1;
    }
    if (read_io_file(redirect) == -1)
      return -1;
    redirect->fd = fd;
  }
  else
    if (read_io_file(redirect) == -1)
      return -1;
  return 0;
}
 
static int read_redirect_list(struct redirect_struct **redirect)
{
  do {
    *redirect = safe_malloc(sizeof (struct redirect_struct));
    if (read_io_redirect(*redirect) == -1)
      goto error_free_redirect;
    redirect = &(*redirect)->next;
    get_token();
  }
  while (current_token_class & LA_REDIRECT_LIST);
  *redirect = NULL;
  unget_token();
  return 0;
 
 error_free_redirect:
  free(*redirect);
  *redirect = NULL;
  return -1;
}
 
static int read_cmd_suffix(struct argument_struct **argument, struct redirect_struct **redirect)
{
  do {
    if (current_token.type == TT_WORD) {
      *argument = safe_malloc(sizeof (struct argument_struct));
      (*argument)->word = current_token.word;
      argument = &(*argument)->next;
    }
    else {
      *redirect = safe_malloc(sizeof (struct redirect_struct));
      if (read_io_redirect(*redirect) == -1)
        goto error_free_redirect;
      redirect = &(*redirect)->next;
    }
    get_token();
  }
  while (current_token_class & LA_CMD_SUFFIX);
  *argument = NULL;
  *redirect = NULL;
  unget_token();
  return 0;
 
 error_free_redirect:
  free(*redirect);
  *argument = NULL;
  *redirect = NULL;
  return -1;
}
 
static int read_cmd_prefix(struct redirect_struct **redirect)
{
  do {
    *redirect = safe_malloc(sizeof (struct redirect_struct));
    if (read_io_redirect(*redirect) == -1)
      goto error_free_redirect;
    redirect = &(*redirect)->next;
    get_token();
  }
  while (current_token_class & LA_CMD_PREFIX);
  *redirect = NULL;
  unget_token();
  return 0;
 
 error_free_redirect:
  free(*redirect);
  *redirect = NULL;
  return -1;
}
 
static void read_cmd_word(char **command_name)
{
  *command_name = current_token.word;
}
 
static void read_cmd_name(char **command_name)
{
  *command_name = current_token.word;
}
 
static int read_simple_command(struct simple_command_struct *simple_command)
{
  struct redirect_struct **redirect;
 
  if (current_token_class & LA_CMD_PREFIX) {
    if (read_cmd_prefix(&simple_command->first_redirect) == -1)
      goto free_redirect_list;
    get_token();
    if (current_token_class & LA_CMD_WORD) {
      read_cmd_word(&simple_command->command_name);
      get_token();
      if (current_token_class & LA_CMD_SUFFIX) {
        redirect = &simple_command->first_redirect;
        while (*redirect)
          redirect = &(*redirect)->next;
        if (read_cmd_suffix(&simple_command->first_argument, redirect) == -1)
          goto free_argument_list;
      }
      else {
        simple_command->first_argument = NULL;
        unget_token();
      }
    }
    else {
      simple_command->command_name = NULL;
      unget_token();
    }
  }
  else {
    read_cmd_name(&simple_command->command_name);
    get_token();
    if (current_token_class & LA_CMD_SUFFIX) {
      if (read_cmd_suffix(&simple_command->first_argument, &simple_command->first_redirect) == -1)
        goto free_argument_list;
    }
    else {
      simple_command->first_argument = NULL;
      simple_command->first_redirect = NULL;
      unget_token();
    }
  }
  return 0;
 
 free_argument_list:
  free_argument_list(simple_command->first_argument);
  free(simple_command->command_name);
 free_redirect_list:
  free_redirect_list(simple_command->first_redirect);
  return -1;
}
 
static int read_term(struct and_or_list_struct **and_or_list)
{
  do {
    *and_or_list = safe_malloc(sizeof (struct and_or_list_struct));
    (*and_or_list)->line_number = line_number;
    if (read_and_or(&(*and_or_list)->first_pipeline) == -1)
      goto error_free_pipeline_list;
    get_token();
    if (!(current_token_class & LA_SEPARATOR)) {
      (*and_or_list)->asynchronous = 0;
      (*and_or_list)->next = NULL;
      goto unget_token;
    }
    read_separator(&(*and_or_list)->asynchronous);
    and_or_list = &(*and_or_list)->next;
    get_token();
  }
  while (current_token_class & LA_TERM);
  *and_or_list = NULL;
 unget_token:
  unget_token();
  return 0;
 
 error_free_pipeline_list:
  free_pipeline_list((*and_or_list)->first_pipeline);
  free(*and_or_list);
  *and_or_list = NULL;
  return -1;
}
 
static int read_compound_list(struct list_struct *list)
{
  struct and_or_list_struct *and_or_list;
 
  if (current_token_class & LA_NEWLINE_LIST) {
    read_newline_list();
    get_token();
    if (!(current_token_class & LA_TERM)) {
      parser_error();
      return -1;
    }
  }
  if (read_term(&list->first_and_or_list) == -1)
    goto free_and_or_list_list;
  get_token();
  if (current_token_class & LA_SEPARATOR) {
    and_or_list = list->first_and_or_list;
    while (and_or_list)
      and_or_list = and_or_list->next;
    read_separator(&and_or_list->asynchronous);
  }
  else
    unget_token();
  return 0;
 
 free_and_or_list_list:
  free_and_or_list_list(list->first_and_or_list);
  return -1;
}
 
static int read_subshell(struct list_struct *list)
{
  get_token();
  if (!(current_token_class & LA_COMPOUND_LIST)) {
    parser_error();
    return -1;
  }
  if (read_compound_list(list) == -1)
    return -1;
  get_token();
  if (current_token.type != TT_RIGHT_PARENTHESIS) {
    parser_error();
    goto error_free_and_or_list;
  }
  return 0;
 
 error_free_and_or_list:
  free_and_or_list_list(list->first_and_or_list);
  return -1;
}
 
static int read_compound_command(struct compound_command_struct *compound_command)
{
  compound_command->type = CC_SUBSHELL;
  compound_command->list = safe_malloc(sizeof (struct list_struct));
  compound_command->list->line_number = line_number;
  if (read_subshell(compound_command->list) == -1)
    goto free_list;
  return 0;
 
 free_list:
  free(compound_command->list);
  return -1;
}
 
static int read_command(struct command_struct *command)
{
  if (current_token_class & LA_SIMPLE_COMMAND) {
    command->type = CT_SIMPLE;
    if (read_simple_command(&command->simple_command) == -1)
      return -1;
  }
  else {
    command->type = CT_COMPOUND;
    if (read_compound_command(&command->compound_command) == -1)
      return -1;
    get_token();
    if (current_token_class & LA_REDIRECT_LIST) {
      if (read_redirect_list(&command->compound_command.first_redirect) == -1)
        goto free_redirect_list;
    }
    else {
      command->compound_command.first_redirect = NULL;
      unget_token();
    }
  }
  return 0;
 
 free_redirect_list:
  free_redirect_list(command->compound_command.first_redirect);
  return -1;
}
 
static int read_pipe_sequence(struct command_struct **command)
{
  do {
    *command = safe_malloc(sizeof (struct command_struct));
    (*command)->line_number = line_number;
    if (read_command(*command) == -1)
      goto error_free_command;
    get_token();
    if (current_token.type != TT_BAR) {
      (*command)->next = NULL;
      break;
    }
    command = &(*command)->next;
    read_linebreak();
    get_token();
    if (!(current_token_class & LA_PIPE_SEQUENCE)) {
      parser_error();
      goto error;
    }
  }
  while (1);
  unget_token();
  return 0;
 
 error_free_command:
  free(*command);
 error:
  *command = NULL;
  return -1;
}
 
static int read_pipeline(struct pipeline_struct *pipeline)
{
  if (read_pipe_sequence(&pipeline->first_command) == -1)
    goto free_command_list;
  return 0;
 
 free_command_list:
  free_command_list(pipeline->first_command);
  return -1;
}
 
static int read_and_or(struct pipeline_struct **pipeline)
{
  do {
    *pipeline = safe_malloc(sizeof (struct pipeline_struct));
    (*pipeline)->line_number = line_number;
    if (read_pipeline(*pipeline) == -1)
      goto error_free_pipeline;
    get_token();
    switch (current_token.type) {
    case TT_AND_AND:
      (*pipeline)->operator = AO_AND;
      break;
    case TT_OR_OR:
      (*pipeline)->operator = AO_OR;
      break;
    default:
      (*pipeline)->next = NULL;
      goto unget_token;
    }
    pipeline = &(*pipeline)->next;
    read_linebreak();
    get_token();
    if (!(current_token_class & LA_AND_OR)) {
      parser_error();
      goto error;
    }
  }
  while (1);
 unget_token:
  unget_token();
  return 0;
 
 error_free_pipeline:
  free(*pipeline);
 error:
  *pipeline = NULL;
  return -1;
}
 
static int read_list(struct list_struct *list)
{
  struct and_or_list_struct **and_or_list;
 
  and_or_list = &list->first_and_or_list;
  do {
    *and_or_list = safe_malloc(sizeof (struct and_or_list_struct));
    (*and_or_list)->line_number = line_number;
    if (read_and_or(&(*and_or_list)->first_pipeline) == -1)
      goto error_free_pipeline_list;
    get_token();
    if (!(current_token_class & LA_SEPARATOR_OP)) {
      (*and_or_list)->asynchronous = 0;
      (*and_or_list)->next = NULL;
      goto unget_token;
    }
    read_separator_op(&(*and_or_list)->asynchronous);
    and_or_list = &(*and_or_list)->next;
    get_token();
  }
  while (current_token_class & LA_LIST);
  *and_or_list = NULL;
 unget_token:
  unget_token();
  return 0;
 
 error_free_pipeline_list:
  free_pipeline_list((*and_or_list)->first_pipeline);
  free(*and_or_list);
  *and_or_list = NULL;
  free_and_or_list_list(list->first_and_or_list);
  return -1;
}
 
static int read_complete_command(struct list_struct *list)
{
  if (read_list(list) == -1)
    return -1;
  get_token();
  if (current_token.type != TT_END_OF_INPUT && current_token.type != TT_NEWLINE) {
    parser_error();
    goto error_free_and_or_list;
  }
  if (current_token.type == TT_END_OF_INPUT && current_token.error)
    goto error_free_and_or_list;
  return 0;
 
 error_free_and_or_list:
  free_and_or_list_list(list->first_and_or_list);
  return -1;
}
 
/* Lit et analyse une commande.
 * En cas de succes, retourne 0. Si EOF est rencontre avant d'avoir pu lire
 * quoi que ce soit, parser_eof est mis a 1. Sinon, il est mis a 0 et :
 *   - si une commande complete est lue, son arbre syntaxique est place dans
 *     current_list.
 *   - si une commande vide est lue, current_list est mis a NULL.
 * Retourne -1 en cas d'erreur. */
int parse()
{
  get_token();
  if (current_token.type == TT_END_OF_INPUT) {
    if (current_token.error)
      goto error;
    parser_eof = 1;
    current_list = NULL;
    return 0;
  }
  parser_eof = 0;
  if (current_token_class & LA_COMPLETE_COMMAND) {
    current_list = safe_malloc(sizeof (struct list_struct));
    current_list->line_number = line_number;
    if (read_complete_command(current_list) == -1)
      goto error_free_current_list;
    return 0;
  }
  else if (current_token.type == TT_NEWLINE) {
    current_list = NULL;
    return 0;
  }
  else {
    parser_error();
    goto error;
  }
 
 error_free_current_list:
  free(current_list);
 error:
  if (current_token.type == TT_WORD)
    free(current_token.word);
  if (current_token.type != TT_END_OF_INPUT && current_token.type != TT_NEWLINE)
    skip_line();
  current_list = NULL;
  return -1;
}