Sample Source Code
Proxima: cgi.pl

About the Program

"Proxima" is a combination HTTP daemon and HTTP proxy server, designed for QuickATM's internet kiosk project. It will run on the servers in each pod of kiosks, providing a clean interface to the web for web surfers -- complete with 'smut' URL blocking, nice error messages, different levels of users (with different URL restrictions), and transaction logging. It was written to be fully compliant with the HTTP/1.0 and CGI/1.1 specifications.

This module handles local HTTP requests for CGI scripts. Care is taken to make the CGI script secure by escaping or removing potentially dangerous metacharacters. Script output is filtered, and an appropriate HTTP header is returned -- both for normal CGI scripts and for the non-pared-header 'nph-' scripts. The single entry point into this module is &HandleCGI.

The Code

# ----------------------------------------------------
# 'cgi.pl'
# Part of the Proxima httpd/proxy server.
# Copyright (c) 1996 by QuickATM, all rights reserved.
# ----------------------------------------------------

# ---------------------------------------------------------------------------

sub CGIPrepareEnvironment {
    local( $key );

    # purge the environment.
    foreach ( keys %ENV ) {
        $ENV{ $_ } = "" if ( ! /^path$/i );
    }
    undef %ENV;
    reset %ENV;

    # add in elements from the header.
    foreach $key ( @headers ) {
        local( $envkey, $envvalue );
        $envkey = "HTTP_$key";
        $envkey =~ y/-a-z/_A-Z/;
        $envvalue = $headers{ $key };
        $ENV{ $envkey } = $envvalue;
    }

    # protocol identifiers.
    $ENV{ "SERVER_SOFTWARE" }   = "$program/$version";
    $ENV{ "SERVER_PROTOCOL" }   = "$our_http_version";
    $ENV{ "GATEWAY_INTERFACE" } = "$our_cgi_version";

    # the server.
    $ENV{ "SERVER_NAME" }       = "$local_host";
    $ENV{ "SERVER_PORT" }       = "$local_port";

    # the client.
    $ENV{ "REMOTE_HOST" }       = "$client_name";
    $ENV{ "REMOTE_ADDR" }       = "$client_addr";

    # the request.
    $ENV{ "REQUEST_METHOD" }    = "$http_method";
    $ENV{ "SCRIPT_NAME" }       = "$cgi_dir/$cgi_file";
    $ENV{ "QUERY_STRING" }      = "$local_args";

    # the form data (if any).
    if ( $http_method eq "POST" ) {
        $ENV{ "CONTENT_TYPE" }   = "$http_content_data";
        $ENV{ "CONTENT_LENGTH" } = "$http_content_length";
    }

    # path information (if an imagemap).
    if ( $cgi_path_info ne "" ) {
       $ENV{ "PATH_INFO" }       = "$cgi_path_info";
       $ENV{ "PATH_TRANSLATED" } = &NormalizeFilename( $cgi_path_info );
    }
}

# ---------------------------------------------------------------------------

sub CGIExtractPathInfo {
    local( $next, $unparsed );

    # some safe defaults.
    $cgi_dir = "";
    $cgi_file = "";
    $cgi_path_info = "";

    # parse everything but the initial slash.
    $unparsed = $local_file;
    $unparsed =~ s#^/##g;

    while ( $unparsed =~ /\// ) {
        # extract the next path component.
        ( $next, $unparsed ) = split( /\//, $unparsed, 2 );

        # if this file/directory doesn't exist then the URL was bogus!
        if ( ! -e "$cgi_dir/$next" ) {
            &ErrorNoFile;
            return;
        }

        # if this is a file, the rest must be a PATH_INFO (like for imagemaps).
        if ( ! -d "$cgi_dir/$next" ) {
            $cgi_file = "$next";
            $cgi_path_info = "/$unparsed";
            last;
        }

        # otherwise, this is a valid directory -- so add it and keep recursing.
        $cgi_dir .= "/$next";
        $cgi_file = $unparsed;
    }

    # if we're in the root directory (unlikely) add a slash.
    $cgi_dir = "/" if ( $cgi_dir eq "" );
}

# ---------------------------------------------------------------------------

sub CGISecureFilename {
    $local_file =~ s/([;<>\*\|`&\$!#\(\)\[\]\{\}:'"])/_/g;
}

sub CGISecureArgs {
    local( @arr );

    return if ( $cgi_args eq "" );

    # first, remove all quotes and dollar signs.
    $cgi_args =~ s/([`\$'"])/_/g;

    # now, enclose all components in double quotes.
    # to do this we quote each component and then add spaces.
    @arr = split( /\s+/, $cgi_args );
    $cgi_args = '"' . join( '""', @arr ) . '"';
    $cgi_args =~ s/""/" "/g;
}

# ---------------------------------------------------------------------------

sub HandleCGI {
    local( $cgi_dir, $cgi_file );
    local( $cgi_stdin, $cgi_stdout, $result );
    local( $ret_status, $ret_location, $ret_content_type );
    local( $size );

    # for security, remove some nasty shell meta-characters.
    # recall that all URL-Decoding has already been done.
    &CGISecureFilename;
    $cgi_args = $local_args;
    &CGISecureArgs;

    &CGIExtractPathInfo;

    &CGIPrepareEnvironment;

    $cgi_stdin  = "$tmp_dir/.$program_$$_cgi_stdin";
    $cgi_stdout = "$tmp_dir/.$program_$$_cgi_stdout";

    `rm -f $cgi_stdin $cgi_stdout`;

    # create STDIN for this form (if any).
    open( CGI, ">$cgi_stdin" );
    print CGI "$http_content_data";
    print CGI "\r\n";
    close( CGI );

    # build the CGI command.
    $cgi_command = "cd $cgi_dir; ./$cgi_file $cgi_args";

    # DEBUG STATEMENTS.
    # print "\n$cgi_command < $cgi_stdin >& $cgi_stdout\n";
    # foreach $K ( sort keys %ENV ) { print "$K -> $ENV{$K}\n"; }

    # execute the CGI script.
    # BOOFA make sure it doesn't hang???
    $result = `$cgi_command < $cgi_stdin >& $cgi_stdout`;
    ( $size ) = ( stat( $cgi_stdout ) )[ 7 ];

    # now parse STDOUT.
    open( CGI, "<$cgi_stdout" );

    # we have to parse the initial lines and send an HTTP header.
    if ( $cgi_file !~ /^nph-/ ) {
        # parse the return header and rewrite it!
        local( $line, $ret_status, $ret_location, $ret_content_type );

        # default values.
        $ret_status = "200 Ok";
        $ret_location = "";
        $ret_content_type = "text/html";

        # read in the CGI header.
        while( $line = <CGI> ) {
            local( $field, $value );

            # adjust the length of the remaining data.
            $size -= length( $line );

            # strip off the CR/LF.
            $line =~ s/\r?\n$//;
            last if ( $line eq "" );

            ( $field, $value ) = split( /\s*:\s*/, $line, 2 );
            $field = &NormalizeHeaderField( $field );
            if ( $field eq "Status" ) {
                $ret_status = $value;
            } elsif ( $field eq "Location" ) {
                $ret_location = $value;
            } elsif ( $field eq "Content-Type" ) {
                $ret_content_type = $value;
            } else {
                &PushHeader( $field, $value );
            }
        }

        # first the redirection, or the result code.
        if ( $ret_location ne "" ) {
            ( $result_code, $result_reason ) = ( 302, "Document Redirection" );
            &PushStatus( $result_code, $result_reason );
            if ( $ret_location =~ /(http:\/\/$local_host):$local_port(.*)/ ) {
               # get rid of the extraneous port information
               # that NCSA's imagemap adds in.
               $ret_location = "$1$2";
            }
            &PushHeader( "Location", $ret_location );
        } else {
            ( $result_code, $result_reason ) = split( /\s+/, $ret_status, 2 );
            &PushStatus( $result_code, $result_reason );
        }

        # push the content type.
        &PushHeader( "Content-Type", $ret_content_type );

        # this is the length of the remaining data.
        if ( ( $size > 0 ) && ( $result_code == 200 ) ) {
            &PushHeader( "Content-Length", $size );
        }

    } else {
        # files starting with 'nph-' provide full HTTP headers!
        # but of course we rewrite them anyway...

        # first the result codes..
        $line = <CGI>;
        ( $result_code, $result_reason ) = ( split( /\s+/, $line, 3 ) )[ 1..2 ];
        &PushStatus( $result_code, $result_reason );

        # and then the rest of the header.
        while( $line = <CGI> ) {
            local( $field, $value );

            $line =~ s/\r?\n$//;
            last if ( $line eq "" );

            ( $field, $value ) = split( /\s*:\s*/, $line, 2 );
            $field = &NormalizeHeaderField( $field );
            &PushHeader( $field, $value );
        }
    }

    # certain mime types are blocked still, eh?
    # even for CGI scripts.  even for 'nph-' CGI scripts.
    $mime = &LookAtHeader( "Content-Type" );
    if ( &IsBlockedMimeType( $mime ) ) {
        &ErrorBlockedMimeType( $mime );
        return;
    }

    &SendHeaderToClient;

    # now dump out all the CGI script output.
    &ResetBody;
    while( <CGI> ) {
        &AddToBody( $_ );
    }
    &FlushBody;
    close( CGI );

    # clean up the old files.
    `rm -f $cgi_stdin $cgi_stdout`;
}

# ---------------------------------------------------------------------------

1;