//
// $Id: REGEX.cpp,v 1.3 2004/10/15 19:39:55 bakerj Exp $
//
//************************** Property of the MITRE Corporation ***************************//
//
// Copyright (c) 2003 - The MITRE Corporation
//
// This file is part of the OVAL Query Interpreter project.
//
// The OVAL Query Interpreter is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License.
//
// The OVAL Query Interpreter is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with the
// OVAL Query Interpreter; if not, write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//****************************************************************************************//

#include "REGEX.h"

REGEX::REGEX()
{
	// -----------------------------------------------------------------------
	//  ABSTRACT
	//
	//  Simple constructor to initialize the matchCount memebr variable
	//
	// -----------------------------------------------------------------------

	this->matchCount = 0;
}

REGEX::~REGEX()
{
	// -----------------------------------------------------------------------
	//  ABSTRACT
	//
	//  Do nothing for now
	//
	// -----------------------------------------------------------------------

}


string REGEX::EscapeRegexChars(string stringIn)
{
	// -----------------------------------------------------------------------
	//  ABSTRACT
	//
	//  This function takes a string and searches for all regular expression characters. If
	//	one is found and it is not already escaped it is escaped with a '\' The regular 
	//	expression chars are stored in a string. The following chars need to be escaped:
	//	^ $ \ . [ ] ( ) * + ? 
	//
	// -----------------------------------------------------------------------
	
	string regexChars ="^$\\.[](){}*+?";
	string fixedString = stringIn;
	string prevChar = "";

	unsigned int pos = fixedString.find_first_of(regexChars, 0);
	while (pos != string::npos)
	{
		//	ensure that the char is not already escaped
		prevChar = fixedString.at(pos-1);
		if(prevChar.compare("\\") != 0)
		{
			fixedString.insert(pos, "\\");
			pos = fixedString.find_first_of(regexChars, pos+2);
		}else
		{
			pos = fixedString.find_first_of(regexChars, pos+1);
		}
	}

	return fixedString;	
}

int REGEX::FindFirstRegexChar(const string stringIn)
{
	// -----------------------------------------------------------------------
	//  ABSTRACT
	//
	//  This function takes a string and searches for the first regular 
	//	expression character. If one is found its location is returned. If none
	//	are found -1 is returned. Only regular expression chars that are not
	//	escaped are considered. The following are considered regular expression
	//	chars if they are not escaped:
	//	^ $ \ . [ ] ( ) * + ? 
	//
	// -----------------------------------------------------------------------
	
	string regexChars	= "^$\\.[](){}*+?";
	string prevChar		= "";
	string curChar		= "";
	string nextChar		= "";
	unsigned int pos	= string::npos;
	int slashCount		= 0;
	int prevIndex		= 0;

	pos = stringIn.find_first_of(regexChars, 0);

	//	Check that one is found
	if(pos == string::npos)
		return -1;

	while (pos != string::npos)
	{
		//	ensure that the char is not escaped
		prevIndex = pos-1;
		if(prevIndex == -1)
			prevChar = "";
		else
			prevChar = stringIn.at(prevIndex);

		curChar = stringIn.at(pos);
		nextChar = stringIn.at(pos+1);
		
		//	If a '\' check that the next next char is a '\'
		if (strncmp(curChar.c_str(), "\\", 1) == 0)
		{
			if(strncmp(nextChar.c_str(), "\\", 1) == 0)
			{
				pos = stringIn.find_first_of(regexChars, pos+2);

			}else
			{
				break;
			}
			
		}else
		{
			//	Get count of consecutive previous '\'s
			slashCount = 0;
			while(prevChar.compare("\\") == 0 && prevIndex > 0)
			{
				slashCount++;
				prevChar = stringIn.at(--prevIndex);
			}

			if(slashCount % 2 == 0)
				break;
			
			pos = stringIn.find_first_of(regexChars, pos+1);
		}
	}

	return pos;	
}

int REGEX::FindLastRegexChar(const string stringIn)
{
	//------------------------------------------------------------------------------------//
	//
	//  ABSTRACT
	//
	//  This function takes a string and searches for the last regular 
	//	expression character. If one is found its location is returned. If none
	//	are found -1 is returned. Only regular expression chars that are not
	//	escaped are considered. The following are considered regular expression
	//	chars if they are not escaped:
	//
	//                 ^ $ \ . [ ] ( ) { } * + ?
	//
	//------------------------------------------------------------------------------------//

	string regexChars	= "^$\\.[](){}*+?";
	string prevChar		= "";
	unsigned int pos	= string::npos;
	int slashCount		= 0;
	int prevIndex		= 0;

	pos = stringIn.find_last_of(regexChars, stringIn.length());

	// Check that at least one regex character is found.

	if (pos == string::npos) return -1;

	while (pos != string::npos)
	{
		// Ensure that the char in question is not escaped.

		prevIndex = pos-1;

		if ((prevIndex) == -1) prevChar = "";
		else prevChar = stringIn.at(prevIndex);

		if (strncmp(prevChar.c_str(), "\\", 1) == 0)
		{
			// We have to make sure the preceeding slash is not part of a double slash as
			// that would negate the escape.  Get count of consecutive previous '\'s.  If
			// it is an even number, then the regex character in question is not escaped.

			slashCount = 1;
			prevChar = stringIn.at(--prevIndex);

			while(prevChar.compare("\\") == 0 && prevIndex > 0)
			{
				slashCount++;
				prevChar = stringIn.at(--prevIndex);
			}

			if(slashCount % 2 == 0) break;
			
			pos = stringIn.find_last_of(regexChars, (pos - slashCount - 1));
		}
		else
		{
			break;
		}
	}

	return pos;	
}

void REGEX::GetConstantPortion(string patternIn, string delimIn, string *patternOut, string *constOut)
{
	// -----------------------------------------------------------------------
	//	Abstract
	//
	//	Return both the constant portion of a string and the remaining
	//	pattern. If no constant portion is found set constOut to "" If the
	//	entire string is constant set patternOut to "". The input delimiter
	//	is used to ensure that constant strings are treated as a unit. 
	//
	//	Loop through the provided pattern breaking it down by removing constant
	//	pieces from the start of the pattern. Build the constant string out of
	//	the pieces. Each piece is determined by looking for the next occurance
	//	of the specified delimiter. As the constatn string is built the 
	//	delimiter is added back in to the string.
	//
	//	If an error occures an exception is thrown
	//
	// -----------------------------------------------------------------------

	unsigned int nextDelim = string::npos;
	int delimLen = delimIn.length();
	(*patternOut) = patternIn;
	(*constOut) = "";
	string tmpStr;
	bool rmCarrot = false;
	bool rmDollar = false;

	//	Check if the pattern starts with a carrot (^)
	if((*patternOut).at(0) == '^')
	{
		(*patternOut) = (*patternOut).substr(1, (*patternOut).length()-1);
		rmCarrot = true;
	}

	//	Check if the pattern ends with a dollar ($)
	if((*patternOut).at((*patternOut).length()-1) == '$')
	{
		(*patternOut) = (*patternOut).substr(0, (*patternOut).length()-1);
		rmDollar = true;
	}

	while((nextDelim = (*patternOut).find(delimIn, 0)) != string::npos)
	{
		//	Get the next substring
		tmpStr = (*patternOut).substr(0, nextDelim+1+delimLen);

		//	Make sue the substring is constant
		if(IsConstant(tmpStr))
		{
			//	Add the string to the constant string
			(*constOut)  = (*constOut)  + tmpStr;

			// Remove the string from the pattern
			(*patternOut) = (*patternOut).substr(nextDelim+1+delimLen, (*patternOut).length()-nextDelim+1+delimLen);
			
		}else{
			
			// If not constant finished processing
			break;
		}
	}

	// Finally check the last piece of the pattern
	if (IsConstant((*patternOut)))
	{
		(*constOut)  = (*constOut)  + (*patternOut);
		(*patternOut) = "";
	}

	//	Add the $ to the end of the pattern if it was removed 
	//	and there is a pattern ramaining
	if(rmCarrot && (*patternOut).length() != 0)
	{
		(*patternOut) = (*patternOut) + "$";
	}

	//	Add the carrot tot he beginning of the pattern (if removed 
	//	or a constant portion was found) and there is still a pattern left
	if((rmCarrot || (*constOut).length() != 0) && (*patternOut).length() > 0)
	{
		(*patternOut) = "^" + (*patternOut);
	}
}

bool REGEX::IsConstant(string pattern)
{
	// -----------------------------------------------------------------------
	//	Abstract
	//
	//	Return true if the specified pattern is constant. If the string is of
	//	length = 0 return true.
	//
	// -----------------------------------------------------------------------
	int regexChar = string::npos;
	bool constant = true;
	
	regexChar = FindFirstRegexChar(pattern);
	
	//	If length is 0 return true
	if(pattern.length() == 0)
		return true;

	if(regexChar != -1)
	{
		//	Try removing a leading ^ if it is not followed by a regex char
		if (regexChar == 0 && pattern.at(regexChar) == '^')
		{
			//	Remove the ^
			string tmpPattern = pattern.substr(1, pattern.length()-1);
			
			//	Retest with recursive call
			constant = IsConstant(tmpPattern);

		}else {

			constant = false;
		}
	}

	return constant;
}

bool REGEX::IsMatch(const char *patternIn, const char *searchStringIn)
{
	// -----------------------------------------------------------------------
	//	Abstract
	//
	//	Return true if the searchString matches the specifed pattern
	//	
	//	Regular expression support is provided by the REGEX library package,
	//	which is open source software, written by Philip Hazel, and copyright
	//	by the University of Cambridge, England. 
	//
	//	Download site:	ftp://ftp.csx.cam.ac.uk/pub/software/programming/REGEX/
	//
	// -----------------------------------------------------------------------

	bool		result				= false;
	pcre		*compiledPattern;
	const char	*error;
	int			erroffset;

	//	Test the match count
	if(this->matchCount >= MAXMATCHES)
	{
		string errMsg = "Warning: The specified pattern has matched more than the supported number of items.";
		errMsg.append("\nPattern: ");
		errMsg.append(patternIn);
		throw REGEXException(errMsg, ERROR_WARN);	
	}
		
	//	Compile the pattern
	compiledPattern = pcre_compile(	patternIn,			// the pattern					
									PCRE_CASELESS,		// default options				
									&error,				// for error message			
									&erroffset,			// for error offset				
									NULL);				// use default character tables	

	//	Check for compile errors
	if(compiledPattern == NULL)
	{
		string errMsg = "Error: Failed to compile the specifed regular expression pattern.\n\tPattern: ";
		errMsg.append(patternIn);
		errMsg.append("\n\tOffset: ");
		
		ostringstream erroffsetStr;
		erroffsetStr << erroffset;
		
		errMsg.append(erroffsetStr.str());
		errMsg.append("\n\tMessage: ");
		errMsg.append(error);
		throw REGEXException(errMsg);
	}
 

	//	Match a pattern
	int rc;
	int ovector[30];
	rc = pcre_exec(	compiledPattern,		// result of REGEX_compile()			
					NULL,					// we didn't study the pattern		
					searchStringIn,			// the subject string				
					strlen(searchStringIn),	// the length of the subject string	
					0,						// start at offset 0 in the subject	
					0,						// default options					
					ovector,				// vector for substring information	
					30);					// number of elements in the vector	

	//	Test the return value of the pattern match 
	//	and increment the match count if a match was found
	if(rc>=1)
	{
		result = true;
		this->matchCount++;
	}else
	{
		result = false;
	}

	free(compiledPattern);

	return(result);
}

string REGEX::RemoveExtraSlashes(string strIn)
{
	// -----------------------------------------------------------------------
	//  ABSTRACT
	//
	//  This function takes a string and searches for all the double '\'s. 
	//	Each double '\' //	is converted to a single '\' 
	//
	// -----------------------------------------------------------------------

	string doubleSlash ="\\\\";

	unsigned int pos = strIn.find(doubleSlash, 0);
	while (pos != string::npos)
	{
		strIn.erase(pos++, 1);
		pos = strIn.find(doubleSlash, pos);
	}

	return strIn;	
}

void REGEX::Reset()
{	// -----------------------------------------------------------------------
	//  ABSTRACT
	//
	//  Set the match count back to zero
	//
	// -----------------------------------------------------------------------

	this->matchCount = 0;

}


//****************************************************************************************//
//								REGEXException Class										  //	
//****************************************************************************************//
REGEXException::REGEXException() : Exception()
{
	// -----------------------------------------------------------------------
	//	Abstract
	//
	//	Default constructor simply set the severity to ERROR_FATAL. This is 
	//	done with the explicit call to the Exception class default constructor.
	//
	// -----------------------------------------------------------------------

}

REGEXException::REGEXException(string errMsgIn) : Exception(errMsgIn)
{
	// -----------------------------------------------------------------------
	//	Abstract
	//
	//	Set the error message and then set the severity to ERROR_FATAL. This is 
	//	done with the explicit call to the Exception class constructor that 
	//	takes a single string param.
	//
	// -----------------------------------------------------------------------

}

REGEXException::REGEXException(string errMsgIn, int severity) : Exception(errMsgIn, severity)
{
	// -----------------------------------------------------------------------
	//	Abstract
	//
	//	Set the error message and the severity to the specified values. This is 
	//	done with the explicit call to the Exception class constructor that 
	//	takes a string msg and an int severity param.
	//
	// -----------------------------------------------------------------------

}

REGEXException::~REGEXException()
{
	// -----------------------------------------------------------------------
	//	Abstract
	//
	//	Do nothing for now
	//
	// -----------------------------------------------------------------------

}

