Thoughts from Homework 7

The value of the else

Consider the following two examples:

if (presState == INSPACE || presState == LEADINGDIGITS) {
    presState = LEADINGDIGITS ;
} else if (presState == AFTERDOT || presState == TRAILINGDIGITS) {
    presState = TRAILINGDIGITS ;
} else if (presState == AFTERE || presState == AFTERSIGN
    || presState == READINGEXPONENT) {
    presState = READINGEXPONENT ;
}
if (presState == INSPACE || presState == LEADINGDIGITS) {
    presState = LEADINGDIGITS ;
}
if (presState == AFTERDOT || presState == TRAILINGDIGITS) {
    presState = TRAILINGDIGITS ;
}
if (presState == AFTERE || presState == AFTERSIGN
    || presState == READINGEXPONENT) {
    presState = READINGEXPONENT ;
}

Both examples seem to do the same thing, but the second requires a bit more reasoning. To understand the elseless code you need to determine if it is possible that more than one of the if conditions may be triggered.

Avoiding repeating code

There were (at least) two possible ways to solve the problem. One was to write a switch statement that branched according to the input and examine the state to determine the next transition. The other was to write a switch statement that branched according to the state and then examine the input to determine the next transition.

When the second approach was used in submitted solutions, a switch statement was used to determine the transistion. Because many inputs resulted in the same transistion regardless of the state, this resulted in the same code being repeated throughout the program.

Here's a little table that compares the two different approches used in the submitted solutions. The four submitted solutions that passed no test cases are not covered in this table.

Metric Input test first State test first
Number of examples 11 8
Average size in bytes 4219 14140
Average size in lines 417 1172
Percentage passing all tests 36.36% 12.50%
Average number of failed "lines" 5.000 8.125

Other solutions

Using flex

%{
#include <stdio.h>
#include <stdlib.h>

static int lineCount = 0 ;
static int wordCount = 0 ;
static int realCount = 0 ;
%}

real [0-9]+(([.][0-9]+)([eE][+-]?[0-9]+)?|([eE][+-]?[0-9]+))
word [^ \t\n]+
eol  \n 
wsp  [ \t]+

%%
{real} {++wordCount; ++realCount; }
{word} {++wordCount; }
{eol}  {
	++lineCount ;
	printf("Line %2d:  #Words %2d, #REAL %2d\n",
	       lineCount, wordCount, realCount) ;
	wordCount = 0 ;
	realCount = 0 ;
}
{wsp}  {;}

%%
main()
{
    yylex();
}

A table driven solution

#include <stdio.h>
#include <stdlib.h>

#include "chartypes.h"

enum myStates {
    INSPC,
    INWRD,
    LDNMS,
    PSTDT,
    TRNMS,
    PSTEX,
    PSTSN,
    INEXP,
    EXTIN  /* Not a real state */
};

enum myStates nextStateTable[EXTIN][RCNUMTYPES] = {
  /*       EOL    WSP    NUM    DOT    SGN    EXP    OTH               */
  {EXTIN, INSPC, INSPC, LDNMS, INWRD, INWRD, INWRD, INWRD} ,  /* INSPC */
  {EXTIN, INSPC, INSPC, INWRD, INWRD, INWRD, INWRD, INWRD} ,  /* INWRD */
  {EXTIN, INSPC, INSPC, LDNMS, PSTDT, INWRD, PSTEX, INWRD} ,  /* LDNMS */
  {EXTIN, INSPC, INSPC, TRNMS, INWRD, INWRD, INWRD, INWRD} ,  /* PSTDT */
  {EXTIN, INSPC, INSPC, TRNMS, INWRD, INWRD, PSTEX, INWRD} ,  /* TRNMS */
  {EXTIN, INSPC, INSPC, INEXP, INWRD, PSTSN, INWRD, INWRD} ,  /* PSTEX */
  {EXTIN, INSPC, INSPC, INEXP, INWRD, INWRD, INWRD, INWRD} ,  /* PSTSN */
  {EXTIN, INSPC, INSPC, INEXP, INWRD, INWRD, INWRD, INWRD}    /* INEXP */
} ;

enum myAction {
  NOACT, /* No action */
  INCWD, /* Increment word count */
  INCRL, /* Increment real and word count */
  PRINT, /* Print */
  PRTWD, /* INCWD + PRINT */
  PRTRL, /* INCRL + PRINT */
  RETRN  /* Exit */
};

enum myAction nextActionTable[EXTIN][RCNUMTYPES] = {
  /*       EOL    WSP    NUM    DOT    SGN    EXP    OTH               */
  {RETRN, PRINT, NOACT, NOACT, NOACT, NOACT, NOACT, NOACT} ,  /* INSPC */
  {RETRN, PRTWD, INCWD, NOACT, NOACT, NOACT, NOACT, NOACT} ,  /* INWRD */
  {RETRN, PRTWD, INCWD, NOACT, NOACT, NOACT, NOACT, NOACT} ,  /* LDNMS */
  {RETRN, PRTWD, INCWD, NOACT, NOACT, NOACT, NOACT, NOACT} ,  /* PSTDT */
  {RETRN, PRTRL, INCRL, NOACT, NOACT, NOACT, NOACT, NOACT} ,  /* TRNMS */
  {RETRN, PRTWD, INCWD, NOACT, NOACT, NOACT, NOACT, NOACT} ,  /* PSTEX */
  {RETRN, PRTWD, INCWD, NOACT, NOACT, NOACT, NOACT, NOACT} ,  /* PSTSN */
  {RETRN, PRTRL, INCRL, NOACT, NOACT, NOACT, NOACT, NOACT}    /* INEXP */
} ;


int main(int argc, char** argv) {
    int lineCount = 0 ;
    int wordCount = 0 ;
    int realCount = 0 ;
    enum myStates presState = INSPC ;
    while (presState != EXTIN) {
      enum getRealCharTypeEnum nextChar = char2rctype(getchar()) ;
      switch (nextActionTable[(int)presState][(int)nextChar]) {
      case NOACT:
      case RETRN:
	break ;
      case INCRL:
	++realCount ;
      case INCWD:
	++wordCount ;
	break ;
      case PRTRL:
	++realCount ;
      case PRTWD:
	++wordCount ;
      case PRINT:
	++lineCount ;
	printf("Line %2d:  #Words %2d, #REAL %2d\n",
	       lineCount, wordCount, realCount) ;
	wordCount = 0 ;
	realCount = 0 ;
      }
      presState = nextStateTable[(int)presState][(int)nextChar] ;
    }
    return (EXIT_SUCCESS) ;
}