Created
May 23, 2014 09:01
-
-
Save rgladwell/593b2a8e6bca6cb10107 to your computer and use it in GitHub Desktop.
html2perl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///////////////////////////////////////////////////////////////////// | |
// | |
// $Id: main.cpp,v 1.9 2000/11/10 21:01:09 ricardo Exp $ | |
// | |
// Filename: : main.cpp | |
// Date: : Sat Oct 21 16:52:01 BST 2000 | |
// Copyright: : (C) 2000 by Ricardo Gladwell | |
// Email : [email protected] | |
// | |
///////////////////////////////////////////////////////////////////// | |
// // | |
// This program is free software; you can redistribute it and/or // | |
// modify it under the terms of the GNU General Public License // | |
// as published by the Free Software Foundation; either version // | |
// 2 of the License, or (at your option) any later version. // | |
// // | |
///////////////////////////////////////////////////////////////////// | |
#ifdef HAVE_CONFIG_H | |
#include <config.h> | |
#endif | |
///////////////////////////////////////////////////////////////////// | |
// D E F I N E S T A T E M E N T S ////////////////////////////// | |
///////////////////////////////////////////////////////////////////// | |
#define ESCAPE_CHARACTERS "\t\"\'\\@%$#.:&!-" | |
///////////////////////////////////////////////////////////////////// | |
// I N C L U D E S T A T E M E N T S //////////////////////////// | |
///////////////////////////////////////////////////////////////////// | |
#include <unistd.h> | |
#include <iostream.h> | |
#include <fstream.h> | |
#include <unistd.h> | |
#include <string> | |
///////////////////////////////////////////////////////////////////// | |
// M A I N F U N C T I O N ////////////////////////////////////// | |
///////////////////////////////////////////////////////////////////// | |
int main( int argc, char *argv[] ) | |
{ | |
///////////////////////////////////////////////////////////////////// | |
// I N I T I A L I S A T I O N //////////////////////////////////// | |
///////////////////////////////////////////////////////////////////// | |
///////////////////////////////////////////////////////////////////// | |
// Variable Declarations | |
string *file = NULL; // input file | |
string buffer; // buffer | |
string escapees( ESCAPE_CHARACTERS ); // non-printable chars | |
unsigned int index = 0; // generic index | |
int lines_read = 0; // lines read | |
int lines_written = 0; // lines written | |
bool cgi = false; // cgi mode | |
istream *in = &cin; // input stream | |
char c; // command line argument | |
///////////////////////////////////////////////////////////////////// | |
// Get and parse command line options. | |
while( ( c = getopt( argc, argv, "i:e:c" ) ) != -1 ) | |
{ | |
switch( c ) | |
{ | |
case 'i': | |
// include additional escape characters | |
{ | |
string argument( optarg ); | |
for( index = 0; index != argument.length(); index++ ) | |
{ | |
char c = argument.at( index ); | |
if( escapees.find( c ) == string::npos ) escapees += c; | |
} | |
break; | |
} | |
case 'e': | |
// exclude specified escaped characters | |
{ | |
string argument( optarg ); | |
string temp = ""; | |
for( index = 0; index != escapees.length(); index++ ) | |
{ | |
char c = escapees.at( index ); | |
if( argument.find( c ) == string::npos ) temp += c; | |
} | |
escapees = temp; | |
break; | |
} | |
case 'c': | |
// switch cgi mode | |
cgi = true; | |
break; | |
default: | |
cerr << "usage: " << argv[0] << " -[iec] [file]" << endl; | |
exit( 1 ); | |
break; | |
} | |
} | |
if( optind < argc ) { | |
file = new string( argv[optind] ); | |
} | |
///////////////////////////////////////////////////////////////////// | |
// If a file name has been specified set-up input file stream. | |
ifstream *fin = NULL; | |
if ( file != NULL ) | |
{ | |
fin = new ifstream( file->c_str() ); | |
in = fin; | |
} | |
if ( !(*in) ) | |
{ | |
cerr << argv[0] << ": " | |
<< (*file) | |
<< ": No such file or directory" | |
<< endl; | |
return false; | |
} | |
///////////////////////////////////////////////////////////////////// | |
// B E G I N M A I N C O N T R O L L O O P ////////////////// | |
///////////////////////////////////////////////////////////////////// | |
if( cgi ) cout << "print \"Content-type: text/html\\n\\n"; | |
while ( !in->eof() ) | |
{ | |
///////////////////////////////////////////////////////////////////// | |
// Read in a line into the string buffer character by characters | |
// until you hit a newline ('\n') or carriage return ('\r') | |
// character. | |
// empty buffer | |
buffer = ""; | |
// read line or until EOF | |
for ( c = in->get(); c != '\n' && c != '\r'; c = in->get() ) | |
{ | |
if ( c == -1 ) break; | |
buffer += c; | |
} | |
// increment line counter | |
// initialise loop variables | |
lines_read++; | |
bool empty = true; | |
int first_tab = -1; | |
///////////////////////////////////////////////////////////////////// | |
// Eat leading whitespace. | |
for( index = 0; index != buffer.length(); index++ ) | |
{ | |
if ( buffer[index] == '\t' && first_tab == -1 ) | |
{ | |
first_tab = index; | |
} | |
if ( !isspace( buffer[index] ) ) | |
{ | |
if ( first_tab != -1 ) index = first_tab; | |
buffer.erase( 0, index ); | |
empty = false; | |
if( lines_written != 0 || cgi ) cout << "\";" << endl; | |
break; | |
} | |
} | |
///////////////////////////////////////////////////////////////////// | |
// If line is empty simply insert an extra newline in the | |
// terminating newline string. | |
if ( empty ) | |
{ | |
cout << "\\n"; | |
continue; | |
} | |
///////////////////////////////////////////////////////////////////// | |
// Find all bad characters in the escape character list and add the | |
// '\' escape character to the front. When you encounter a tab | |
// character replace with the string '\\t'. | |
for ( index = buffer.find_first_of( escapees, 0 ); | |
index != string::npos; | |
index = buffer.find_first_of( escapees, index + 2 ) ) | |
{ | |
if ( buffer[index] == '\t' ) buffer.replace( index, 1, "\\t" ); | |
else buffer.insert( index, "\\" ); | |
} | |
cout << "print \"" << buffer << "\", \"\\n"; | |
lines_written++; | |
} | |
cout << "\";" << endl; | |
///////////////////////////////////////////////////////////////////// | |
// T E R M I N A T I O N ////////////////////////////////////////// | |
///////////////////////////////////////////////////////////////////// | |
if( fin != NULL ) fin->close(); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment