#include <fstream> #include <sstream> #include "model.h" using namespace std; const wchar_t COMMENT_CHAR = L'#'; const wchar_t PARAMNAME_SEP = L':'; const wchar_t PARAM_ORDER = L'@'; const wchar_t PARAM_REF_BEGIN = L'<'; const wchar_t PARAM_REF_END = L'>'; const wchar_t WEIGHT_BEGIN = L'('; const wchar_t WEIGHT_END = L')'; const wchar_t SET_BEGIN = L'{'; const wchar_t SET_END = L'}'; const wchar_t SET_ORDER = L'@'; const wchar_t SET_SEP = L','; // default separator of param names in submodel/cluster definition const wchar_t RESULT_PARAM_PREFIX = L'$'; // note: keep it consistent with cpsyntax // TODO: change the way we detect the constraints, this is error-prone wstring CONSTRAINT_PATTERN1 = L"IF"; wstring CONSTRAINT_PATTERN2 = L"IF*[*]*"; wstring CONSTRAINT_PATTERN3 = L"[*]*"; wstring CONSTRAINT_PATTERN4 = L"(*[*]*"; wstring CONSTRAINT_PATTERN5 = L"IF*ISNEGATIVE"; wstring CONSTRAINT_PATTERN6 = L"IF*ISNEGATIVE*(*"; wstring CONSTRAINT_PATTERN7 = L"IF*ISPOSITIVE"; wstring CONSTRAINT_PATTERN8 = L"IF*ISPOSITIVE*(*"; // // // bool lineIsComment( wstring& line ) { wstring trimmedLine = trim( line ); if ( trimmedLine.empty() ) return( false ); return( trimmedLine.at( 0 ) == COMMENT_CHAR ); } // // detects whether a line is a constraint // TODO: have better detection here // bool lineIsConstraint( wstring& line ) { wstring trimmed = line; toUpper( trimmed ); trimmed = trim( trimmed ); // if the line contains just "IF", it is a constraint if( 0 == stringCompare( trimmed, CONSTRAINT_PATTERN1, false ) ) { return( true ); } // if the line matches any of the other patterns, it's a constraint return ( patternMatch( CONSTRAINT_PATTERN2, trimmed ) || patternMatch( CONSTRAINT_PATTERN3, trimmed ) || patternMatch( CONSTRAINT_PATTERN4, trimmed ) || patternMatch( CONSTRAINT_PATTERN5, trimmed ) || patternMatch( CONSTRAINT_PATTERN6, trimmed ) || patternMatch( CONSTRAINT_PATTERN7, trimmed ) || patternMatch( CONSTRAINT_PATTERN8, trimmed ) ); } // // detects whether a line is a submodel or a cluster definition // must begin with { and must have } somewhere // bool lineIsParamSet( wstring& line ) { wstring trimmed = trim( line ); if( trimmed.empty() ) { return( false ); } if( trimmed[ 0 ] != SET_BEGIN ) { return( false ); } size_t setend = trimmed.find( SET_END ); if( wstring::npos == setend ) { return( false ); } return( true ); } // // reads one line from a file // bool readLineFromFile( wifstream& file, wstring& line ) { line = L""; if( file.eof() ) return( false ); wchar_t c; while( true ) { file.get( c ); if( file.eof() || c == L'\n' || c == 0 ) return( true ); line += c; } return( true ); } // // read one parameter, these are in the following format: // param [@ N] : val1, ~val2, val3a | val3b, val4 // bool CModelData::readParameter( wstring& line ) { CModelParameter parameter; // param name can be separated by : or , wstring::size_type paramSep = line.find( PARAMNAME_SEP ); if( paramSep == wstring::npos ) { paramSep = line.find( ValuesDelim ); if( paramSep == wstring::npos ) { PrintMessage( InputDataError, L"Parameter", (wchar_t*) line.c_str(), L"should have at least one value defined" ); return( false ); } } wstring name = trim( line.substr( 0, paramSep )); unsigned int order = UNDEFINED_ORDER; //check if this param has custom-order defined wstrings nameAndOrder; split( name, PARAM_ORDER, nameAndOrder ); double d; if( nameAndOrder.size() == 2 && stringToNumber( nameAndOrder[ 1 ], d )) { name = trim( nameAndOrder[ 0 ]); if( d > 0 ) { order = static_cast< unsigned int >( d ); } } parameter.Name = name; parameter.Order = order; if ( ! parameter.Name.empty() && parameter.Name[ 0 ] == RESULT_PARAM_PREFIX ) { parameter.IsResultParameter = true; } // now get the values wstring rawValues = line.substr( paramSep + 1, line.length() - paramSep - 1 ); wstrings values; split( rawValues, ValuesDelim, values ); for( wstrings::iterator i_val = values.begin(); i_val != values.end(); i_val++ ) { *i_val = trim( *i_val ); // // if it is in a form <text> it is a reference to another parameter // find an existing parameter and add all its values here instead // vector< CModelParameter >::iterator refParam; if ( ! i_val->empty() && *(i_val->begin()) == PARAM_REF_BEGIN && *(i_val->rbegin()) == PARAM_REF_END &&( refParam = FindParameterByName( i_val->substr( 1, i_val->length() - 2 ))) != Parameters.end() ) { __push_back( parameter.Values, refParam->Values.begin(), refParam->Values.end() ); } else { // // value weight // Param: Val1 (3), Val21|Val22 (2), Val3 // int weight = 1; size_t weightBegin = i_val->find_last_of( WEIGHT_BEGIN ); size_t weightEnd = i_val->find_last_of( WEIGHT_END ); // '(' must exist, ')' must be the last character if ( weightBegin != -1 && weightEnd == i_val->length() - 1 ) { wstring weightStr = trim( i_val->substr( weightBegin + 1, weightEnd - weightBegin - 1 )); double weightDbl = 0; // anything after @ must be a positive integer if ( stringToNumber( weightStr, weightDbl ) && ( static_cast< unsigned int > (weightDbl) ) > 0 ) { weight = static_cast< unsigned int > (weightDbl); // trim the weight off the value i_val->erase( weightBegin, wstring::npos ); *i_val = trim( *i_val ); } } // // names // wstrings names; split( *i_val, NamesDelim, names ); bool positive = true; for ( wstrings::iterator i_name = names.begin(); i_name != names.end(); i_name++ ) { *i_name = trim( *i_name ); // only the first name determines the negativity of a value if ( i_name->length() > 0 && i_name == names.begin() &&(*i_name)[ 0 ] == InvalidPrefix ) { positive = false; *i_name = trim( i_name->substr( 1, i_name->length() - 1 )); } } if ( ! positive ) { m_hasNegativeValues = true; } CModelValue value( names, weight, positive ); parameter.Values.push_back( value ); } } Parameters.push_back( parameter ); return( true ); } // // // void CModelData::getUnmatchedParameterNames( wstrings& paramsOfSubmodel, wstrings& unmatchedParams ) { for( auto & cparam : paramsOfSubmodel ) { bool found = false; for( auto & param : Parameters ) { if ( 0 == stringCompare( cparam, param.Name, CaseSensitive )) { found = true; break; } } if ( ! found ) { unmatchedParams.push_back( cparam ); } } } // // // bool CModelData::readParamSet( wstring& line ) { const wstring STD_MSG = L"Submodel definition is incorrect: " + line; wstringstream ist( line ); // it's always in a form of { paramName1 @ N, paramName2 @ N, ... } @ N but "@ N" is optional wstring s; ist >> s; wstring::iterator next = line.begin(); // { wstring::iterator begin = findFirstNonWhitespace( next, line.end() ); if( begin == line.end() || *begin != SET_BEGIN ) { PrintMessage( InputDataError, (wchar_t*) STD_MSG.data() ); return( false ); } ++begin; // find } wstring::iterator end; end = find( begin, line.end(), SET_END ); if ( end == line.end() ) { PrintMessage( InputDataError, (wchar_t*) STD_MSG.data() ); return( false ); } // params in the middle wstring setp; setp.assign( begin, end ); setp = trim( setp ); if ( setp.empty() ) { PrintMessage( InputDataError, (wchar_t*) STD_MSG.data() ); return( false ); } // // Two attempts to resolve submodel names: // 1. Use a comma as a separator // 2. If 1 fails to produce matching names, use ModelData.ValuesDelim as a separator // // first figure out whether "," or a delimiter specified by /d option applies wstrings setParams; split( setp, SET_SEP, setParams ); transform( setParams.begin(), setParams.end(), setParams.begin(), trim ); wstrings unmatched; getUnmatchedParameterNames( setParams, unmatched ); if( !unmatched.empty() ) { setParams.clear(); unmatched.clear(); split( setp, ValuesDelim, setParams ); transform( setParams.begin(), setParams.end(), setParams.begin(), trim ); getUnmatchedParameterNames( setParams, unmatched ); if( !unmatched.empty() ) { PrintMessage( InputDataWarning, L"Submodel defintion", (wchar_t*) trim( line ).data(), L"contains unknown parameter. Skipping..." ); return( true ); // just a warning so don't exit } } // remove duplicates sort( setParams.begin(), setParams.end(), stringCaseInsensitiveLess ); wstrings::iterator newEnd = unique( setParams.begin(), setParams.end(), stringCaseInsensitiveEquals ); if( setParams.end() != newEnd ) { PrintMessage( InputDataWarning, L"Submodel defintion", (wchar_t*) trim( line ).data(), L"contains duplicate parameters. Removing duplicates..." ); setParams.erase( newEnd, setParams.end() ); } CModelSubmodel submodel; // match to names, set up the structure for( auto & cparam : setParams ) { bool found = false; unsigned int index = 0; for( auto & param : Parameters ) { if ( 0 == stringCompare( cparam, param.Name, CaseSensitive )) { found = true; break; } ++index; } // at this point we should always match the name assert( found ); submodel.Parameters.push_back( index ); } // @ ++end; wstring::iterator at = findFirstNonWhitespace( end, line.end() ); // anything other than @, quit if ( at != line.end() && *at != SET_ORDER ) { PrintMessage( InputDataError, (wchar_t*) STD_MSG.data() ); return( false ); } if ( at == line.end() ) { // if this is the end then order will be assigned later NOOP } else { ++at; // number wstring numberText; numberText.assign( at, line.end() ); double number; bool ret = stringToNumber( numberText, number ); int order = 0; if( ret ) { order = static_cast<int> (number); if( order <= 0 ) { order = 0; ret = false; } } if ( !ret ) { PrintMessage( InputDataError, (wchar_t*) STD_MSG.data() ); return( false ); } submodel.Order = order; } Submodels.push_back( submodel ); return ( true ); } // // // bool CModelData::readModel( const wstring& filePath ) { // Some implementations of wifstream only allow ANSI strings as file names so converting before using string ansiFilePath = wideCharToAnsi( filePath ); wifstream file( ansiFilePath.c_str() ); if ( !file ) { PrintMessage( InputDataError, L"Couldn't open file:", (wchar_t*)filePath.data() ); return( false ); } wstring line; // read definition of parameters bool firstLine = true; while( true ) { // skip not important stuff if ( lineIsEmpty( line ) || lineIsComment( line )) { if ( ! readLineFromFile( file, line )) return( true ); continue; } if ( firstLine ) { m_encoding = getEncodingType( line ); if ( m_encoding != ANSI && m_encoding != UTF8 ) { PrintMessage( InputDataError, L"Only ANSI and UTF-8 are supported" ); return( false ); } firstLine = false; } // continue reading until a submodel/cluster or a constraint if ( lineIsParamSet( line ) || lineIsConstraint( line )) break; if ( ! readParameter( line )) return( false ); if ( ! readLineFromFile( file, line )) return( true ); } // read submodels if ( lineIsParamSet( line )) { while( true ) { // skip not important stuff if ( lineIsEmpty( line ) || lineIsComment( line )) { if ( ! readLineFromFile( file, line )) return( true ); continue; } // continue reading until a constraint if ( lineIsConstraint( line )) break; if ( ! readParamSet( line )) return( false ); if ( ! readLineFromFile( file, line )) return( true ); } } // anything that's left is constraints while( true ) { // if only a line is not empty or not a comment, // it's got to be a part of constraints definition if ( ! ( lineIsEmpty( line ) || lineIsComment( line ))) { ConstraintPredicates += line; } if ( ! readLineFromFile( file, line )) return( true ); } return( true ); } // // reads model file // bool CModelData::ReadModel( const wstring& filePath ) { if( !readModel( filePath )) { return( false ); } if( !ValidateParams() ) { return( false ); } return( true ); } // // // bool CModelData::ReadRowSeedFile( const wstring& filePath ) { if( trim( filePath ).empty() ) return( true ); // Some implementations of wifstream only allow ANSI strings as file names so converting before using string ansiFilePath = wideCharToAnsi( filePath ); wifstream file( ansiFilePath.c_str() ); if ( !file ) { PrintMessage( InputDataError, L"Couldn't open file:", (wchar_t*)filePath.data() ); return( false ); } wstring line; // parameter names bool fileEmpty = false; if ( readLineFromFile( file, line )) { if ( trim( line ).empty() ) fileEmpty = true; } else { fileEmpty = true; } if ( fileEmpty ) { PrintMessage( RowSeedsWarning, L"Seeding file is empty" ); return( true ); } EncodingType encoding = getEncodingType( line ); if ( encoding != ANSI && encoding != UTF8 ) { PrintMessage( RowSeedsError, L"Only ANSI and UTF-8 are supported" ); return( false ); } vector< vector<CModelParameter>::iterator > parameters; wstrings params; split( line, RESULT_DELIMITER, params ); for( auto & param : params ) { vector<CModelParameter>::iterator found = FindParameterByName( param ); if ( found == Parameters.end()) { PrintMessage( RowSeedsWarning, L"Parameter", (wchar_t*) param.data(), L"not found in the model. Skipping..." ); } parameters.push_back( found ); } // if any parameter equals to ModelData.Parameters.end() // this parameter could not be found in the model while( readLineFromFile( file, line )) { if ( trim(line).empty() ) break; wstrings values; split( line, RESULT_DELIMITER, values ); unsigned int n_param = 0; CModelRowSeed rowSeed; for ( wstrings::iterator i_value = values.begin(); i_value != values.end(); ++i_value, ++n_param ) { // There could be fewer parameter names (in the first line) // than there is values in the following lines. This has // to be detected and a warning issued if ( n_param < (unsigned int) parameters.size() && parameters[ n_param ] != Parameters.end() ) { CModelParameter ¶m = *(parameters[ n_param ]); // remove the negative marker and match up the raw name if ( i_value->length() > 0 && (*i_value)[ 0 ] == InvalidPrefix ) { *i_value = trim( i_value->substr( 1, i_value->length() - 1 )); } // if any value could not be found, the whole seed row is not invalid // we just remove that one offending value and the rest of the row can // stay intact; we cannot really warn about this as in a model with // submodels this is very normal int found = param.GetValueOrdinal( *i_value, CaseSensitive ); if ( found == -1 ) { if ( ! i_value->empty() ) { PrintMessage( RowSeedsWarning, L"Value", (wchar_t*) i_value->data(), L"not found in the model. Skipping this value..." ); } } else { // we don't care about result parameters as we should not seed we expected results if ( ! param.IsResultParameter ) { rowSeed.push_back( make_pair( param.Name, *i_value )); } } } } if ( ! rowSeed.empty() ) { RowSeeds.push_back( rowSeed ); } } if( ! ValidateRowSeeds()) { return( false ); } return( true ); }