From Tim's website
Jump to: navigation, search

C 2 HTML - c2html.cpp

<source lang="c"> // To prevent browse information warnings with Microsoft compiler

  1. pragma warning( disable : 4786 )
  1. include <iostream>
  2. include <fstream>
  3. include <string>
  1. include "keywords.h"
  2. include "symbols.h"

void write_html_char( ofstream& stream, const unsigned char character );

void main( int argc, char *argv[] ) {

  using namespace std;
  // Get the name of the input source file
  string source_filename;
  if( argc != 2 )
  {
     cout << "C to HTML converter.\nEnter source file name: ";
     cin >> source_filename;
  }
  else
  {
     source_filename = argv[1];
  }
  // Create a name for the output html file
  const int last_dot = source_filename.find_last_of( '.' );
  string html_filename = source_filename.substr( 0, last_dot ) + ".html";
  cout << "Converting '" << source_filename << "' to '" << html_filename
       << "'" << endl;
  // Open the source and html file streams
  ifstream source_ifs( source_filename.c_str() );
  ofstream html_ofs( html_filename.c_str() );
  if( !source_ifs.is_open() )
  {
     cout << "Failed to open source file '" << source_filename << "'" << endl;
     return;
  }
  if( !html_ofs.is_open() )
  {
     cout << "Failed to open html file '" << source_filename << "'" << endl;
     return;
  }
  KEYWORDS keywords;
  SYMBOLS symbols;
  html_ofs << "<HTML>\n";
  html_ofs << "<HEAD><LINK href=\"code.css\" rel=stylesheet type=\"text/css\"></HEAD>\n";
  html_ofs << "<BODY>\n";
  // Copy strings from the input file to the output file
  while( !source_ifs.eof() )
  {
     string word;
     bool star_found;
     // Get the next character from the file
     unsigned char character = source_ifs.get();
     switch( character )
     {
     case '\n':
        html_ofs << "
\n"; break;
     case '"':
        // Start string - read until next quote
        html_ofs << "";
        do
        {
           write_html_char( html_ofs, character );
           character = source_ifs.get();
           
           if( character == '\\' )
           {
              // Control character - ignore the next character
              write_html_char( html_ofs, character );
              character = source_ifs.get();
              write_html_char( html_ofs, character );
              character = source_ifs.get();
           }
        }
        while( character != '"' && !source_ifs.eof() );
        write_html_char( html_ofs, character );
        html_ofs << "";
        break;
     case '#':
        // Start directive - read until space
        html_ofs << "";
        write_html_char( html_ofs, character );
        
        source_ifs >> word;
        html_ofs << word;
        html_ofs << "";
        break;
     case '/':
        // May start comment
        character = source_ifs.get();
        switch( character )
        {
        case '/':
           // Line comment - read to end of line
           html_ofs << "";
           write_html_char( html_ofs, '/' );
           while( character != '\n' && !source_ifs.eof() )
           {
              write_html_char( html_ofs, character );
              character = source_ifs.get();
           }
           html_ofs << "";
           html_ofs << "
\n"; break;
        case '*':
           // Limited comment - read until '*/'
           html_ofs << "";
           write_html_char( html_ofs, '/' );
           write_html_char( html_ofs, '*' );
           character = source_ifs.get();
           star_found = character == '*';
           while( !(star_found && character == '/') && !source_ifs.eof() )
           {
              write_html_char( html_ofs, character );
              character = source_ifs.get();
           
              if( character == '*' )
              {
                 star_found = true;
              }
           }
           write_html_char( html_ofs, '/' );
           html_ofs << "";
           break;
        default:
           write_html_char( html_ofs, '/' );
           write_html_char( html_ofs, character );
           break;
        }
        break;
     default:
        // If the character is not a symbol read the rest of the word
        if( !symbols.is( character ) )
        {
           // Read in characters up to the next symbol to make a word
           string word;
           
           while( !symbols.is( character ) && !source_ifs.eof() )
           {
              word += character;
              character = source_ifs.get();
           }
           // Write out the word
           if( keywords.is( word ) )
           {
              html_ofs << "";
              html_ofs << word;
              html_ofs << "";
           }
           else
           {
              html_ofs << word;
           }
        }
        write_html_char( html_ofs, character );
        break;
     }
  }
  html_ofs << "</BODY>\n";
  html_ofs << "</HTML>\n";
  cout << "Done!" << endl;

}

void write_html_char( ofstream& stream, const unsigned char character ) {

  switch( character )
  {
  case '\n':
     stream << "
\n"; break; case '\t': stream << "   "; break; case '"': stream << """; break; case ' ': stream << " "; break; case '&': stream << "&"; break; case '<': stream << "<"; break; case '>': stream << ">"; break; case 255: break; default: stream.put( character ); break; }

} </source>

C 2 HTML - keywords.h

<source lang="c">

  1. include <string>
  2. include <vector>

using namespace std;

class KEYWORDS { public:

  // Default constructor populates keywords list
  KEYWORDS();
  // Function to test if given word is in list
  bool is( const string& word,
           int first = 0,
           int last = wordlist.size()-1 ) const;

private:

  // Private member containing word list
  static vector<string> wordlist;

};

vector<string> KEYWORDS::wordlist;

inline KEYWORDS::KEYWORDS() {

  // Sorted list of keywords
  wordlist.push_back( "and" );              // c++
  wordlist.push_back( "and_eq" );           // c++
  wordlist.push_back( "asm" );
  wordlist.push_back( "auto" );
  wordlist.push_back( "bitand" );           // c++
  wordlist.push_back( "bitor" );            // c++
  wordlist.push_back( "bool" );             // c++
  wordlist.push_back( "break" );
  wordlist.push_back( "case" );
  wordlist.push_back( "catch" );            // c++
  wordlist.push_back( "char" );
  wordlist.push_back( "class" );
  wordlist.push_back( "compl" );            // c++
  wordlist.push_back( "const" );
  wordlist.push_back( "const_cast" );       // c++
  wordlist.push_back( "continue" );
  wordlist.push_back( "default" );
  wordlist.push_back( "delete" );
  wordlist.push_back( "do" );
  wordlist.push_back( "double" );
  wordlist.push_back( "dynamic_cast" );     // c++
  wordlist.push_back( "else" );
  wordlist.push_back( "enum" );
  wordlist.push_back( "explicit" );         // c++
  wordlist.push_back( "extern" );
  wordlist.push_back( "false" );            // c++
  wordlist.push_back( "float" );
  wordlist.push_back( "for" );
  wordlist.push_back( "friend" );
  wordlist.push_back( "goto" );
  wordlist.push_back( "if" );
  wordlist.push_back( "inline" );
  wordlist.push_back( "int" );
  wordlist.push_back( "long" );
  wordlist.push_back( "mutable" );          // c++
  wordlist.push_back( "namespace" );        // c++
  wordlist.push_back( "new" );
  wordlist.push_back( "not" );              // c++
  wordlist.push_back( "not_eq" );           // c++
  wordlist.push_back( "operator" );
  wordlist.push_back( "or" );               // c++
  wordlist.push_back( "or_eq" );            // c++
  wordlist.push_back( "private" );
  wordlist.push_back( "protected" );
  wordlist.push_back( "public" );
  wordlist.push_back( "register" );
  wordlist.push_back( "reinterpret_cast" ); // c++
  wordlist.push_back( "return" );
  wordlist.push_back( "short" );
  wordlist.push_back( "signed" );
  wordlist.push_back( "sizeof" );
  wordlist.push_back( "static" );
  wordlist.push_back( "static_cast" );      // c++
  wordlist.push_back( "struct" );
  wordlist.push_back( "switch" );
  wordlist.push_back( "template" );
  wordlist.push_back( "this" );
  wordlist.push_back( "throw" );            // c++
  wordlist.push_back( "true" );             // c++
  wordlist.push_back( "try" );              // c++
  wordlist.push_back( "typedef" );
  wordlist.push_back( "typeid" );           // c++
  wordlist.push_back( "typename" );         // c++
  wordlist.push_back( "union" );
  wordlist.push_back( "unsigned" );
  wordlist.push_back( "using" );            // c++
  wordlist.push_back( "virtual" );
  wordlist.push_back( "void" );
  wordlist.push_back( "volatile" );
  wordlist.push_back( "wchar_t" );          // c++
  wordlist.push_back( "while" );
  wordlist.push_back( "xor" );              // c++
  wordlist.push_back( "xor_eq" );           // c++

}

inline bool KEYWORDS::is( const string& word, int first, int last ) const {

  if( first >= last )
     return word == wordlist[ first ];
  // Binary search - split the list in two and search the appropriate half
  int centre = first + (last - first) / 2;
  if( word < wordlist[ centre ] )
  {
     return is( word, first, centre - 1 );
  }
  if( word > wordlist[ centre ] )
  {
     return is( word, centre + 1, last );
  }
  return true;

} </source>

C 2 HTML - symbols.h

<source lang="c"> const int NUMBER_OF_ASCII_CHARACTERS = 255;

class SYMBOLS { public:

  // Default constructor identifies characters that are symbols
  SYMBOLS();
  // Function to test if given character is a symbol
  bool is( const unsigned char character ) const;

private:

  // Private member containing true or false for all characters
  bool symbols[NUMBER_OF_ASCII_CHARACTERS];

};

inline SYMBOLS::SYMBOLS() {

  // Start with all characters not being a symbol
  for( int c = 0; c < NUMBER_OF_ASCII_CHARACTERS; c++ )
  {
     symbols[c] = false;
  }
  // Set characters that are symbols to true
  symbols['\n'] = true;
  symbols['\t'] = true;
  symbols['"'] = true;
  symbols[' '] = true;
  symbols[';'] = true;
  symbols[':'] = true;
  symbols['('] = true;
  symbols[')'] = true;
  symbols['['] = true;
  symbols[']'] = true;
  symbols['.'] = true;
  symbols['!'] = true;
  symbols['~'] = true;
  symbols['-'] = true;
  symbols['+'] = true;
  symbols['&'] = true;
  symbols['*'] = true;
  symbols['/'] = true;
  symbols['%'] = true;
  symbols['<'] = true;
  symbols['>'] = true;
  symbols['^'] = true;
  symbols['|'] = true;
  symbols['?'] = true;
  symbols['='] = true;
  symbols[','] = true;

}

inline bool SYMBOLS::is( const unsigned char character ) const {

  return symbols[ character ];

} </source>