RapidMiner

0 Likes

Simulator Error - Cannot map index of nominal attribute to nominal value: index -1 is out of bounds

Status: Resolved

I get the attached error when executing the simulator operator, and was curious what I could do to troubleshoot. I've traced the data and all dependent (prior) processes execute ok. The process consumes 10,000 examples and about 47 attributes. Any guidance is appreciated.

 

 

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.1.001" expanded="true" height="68" name="Feature Weights" width="90" x="179" y="391">
<parameter key="repository_entry" value="../03 - Select Features/Weights/TPD3 Weights"/>
</operator>
<operator activated="true" class="retrieve" compatibility="8.1.001" expanded="true" height="68" name="Model" width="90" x="179" y="34">
<parameter key="repository_entry" value="../06 - Learn Models/TPD3 - Bayes"/>
</operator>
<operator activated="false" class="operator_toolbox:smote" compatibility="0.9.000" expanded="true" height="82" name="Smote Upsampling" width="90" x="447" y="136">
<parameter key="Equalize Classes" value="false"/>
</operator>
<operator activated="true" class="retrieve" compatibility="8.1.001" expanded="true" height="68" name="Data Set" width="90" x="45" y="238">
<parameter key="repository_entry" value="../02 - Data/Prediction - MIPC1 - TPD3"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.1.001" expanded="true" height="82" name="Target" width="90" x="179" y="238">
<parameter key="attribute_name" value="Third Party Damage"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="select_by_weights" compatibility="8.1.001" expanded="true" height="103" name="Select by Weights (2)" width="90" x="313" y="238"/>
<operator activated="true" automodel="EXPORTED" class="multiply" compatibility="8.1.001" expanded="true" height="103" name="Multiply (3)" width="90" x="581" y="238"/>
<operator activated="true" class="split_data" compatibility="8.1.001" expanded="true" height="103" name="Split Data (3)" width="90" x="715" y="187">
<enumeration key="partitions">
<parameter key="ratio" value="0.8"/>
<parameter key="ratio" value="0.2"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
<parameter key="use_local_random_seed" value="true"/>
</operator>
<operator activated="true" automodel="EXPORTED" class="model_simulator:model_simulator" compatibility="8.1.001" expanded="true" height="103" name="TPD Simulator" width="90" x="849" y="136"/>
<operator activated="true" class="split_data" compatibility="8.1.001" expanded="true" height="103" name="Split Data (4)" width="90" x="715" y="391">
<enumeration key="partitions">
<parameter key="ratio" value="0.8"/>
<parameter key="ratio" value="0.2"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
<parameter key="use_local_random_seed" value="true"/>
</operator>
<operator activated="true" automodel="EXPORTED" class="model_simulator:explain_predictions" compatibility="8.1.001" expanded="true" height="103" name="TPD Predictions" width="90" x="983" y="391">
<parameter key="maximal explaining attributes" value="10"/>
<parameter key="local sample size" value="5000"/>
</operator>
<operator activated="true" class="store" compatibility="8.1.001" expanded="true" height="68" name="Store" width="90" x="983" y="136">
<parameter key="repository_entry" value="Third Party Simulator"/>
</operator>
<operator activated="true" class="store" compatibility="8.1.001" expanded="true" height="68" name="Store (2)" width="90" x="1117" y="442">
<parameter key="repository_entry" value="Third Party Predictions"/>
</operator>
<connect from_op="Feature Weights" from_port="output" to_op="Select by Weights (2)" to_port="weights"/>
<connect from_op="Model" from_port="output" to_op="TPD Simulator" to_port="model"/>
<connect from_op="Data Set" from_port="output" to_op="Target" to_port="example set input"/>
<connect from_op="Target" from_port="example set output" to_op="Select by Weights (2)" to_port="example set input"/>
<connect from_op="Select by Weights (2)" from_port="example set output" to_op="Multiply (3)" to_port="input"/>
<connect from_op="Select by Weights (2)" from_port="weights" to_port="result 1"/>
<connect from_op="Multiply (3)" from_port="output 1" to_op="Split Data (3)" to_port="example set"/>
<connect from_op="Multiply (3)" from_port="output 2" to_op="Split Data (4)" to_port="example set"/>
<connect from_op="Split Data (3)" from_port="partition 1" to_op="TPD Simulator" to_port="training data"/>
<connect from_op="Split Data (3)" from_port="partition 2" to_op="TPD Simulator" to_port="test data"/>
<connect from_op="TPD Simulator" from_port="simulator output" to_op="Store" to_port="input"/>
<connect from_op="TPD Simulator" from_port="model output" to_op="TPD Predictions" to_port="model"/>
<connect from_op="Split Data (4)" from_port="partition 1" to_op="TPD Predictions" to_port="training data"/>
<connect from_op="Split Data (4)" from_port="partition 2" to_op="TPD Predictions" to_port="test data"/>
<connect from_op="TPD Predictions" from_port="visualization output" to_op="Store (2)" to_port="input"/>
<connect from_op="TPD Predictions" from_port="example set output" to_port="result 2"/>
<connect from_op="TPD Predictions" from_port="importances output" to_port="result 3"/>
<connect from_op="Store" from_port="through" to_port="result 4"/>
<connect from_op="Store (2)" from_port="through" to_port="result 5"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
<description align="center" color="green" colored="true" height="54" resized="false" width="112" x="170" y="178">Target Label Role</description>
<description align="center" color="green" colored="true" height="54" resized="false" width="112" x="170" y="480">Target Feature Weights</description>
<description align="center" color="green" colored="true" height="54" resized="false" width="112" x="168" y="107">Target Model</description>
</process>
</operator>
</process>

5 Comments (5 New)
Comments
Community Manager
Status: Needs Info

Hi @michaelgloven thank you for this report. Can you also please attach (1) your inputs and (2) your rapidminer-studio.log file so we can reproduce the error?

 

Scott

 

RM Staff

Hi @michaelgloven,

 

I have a theory what could have happened.  Could it be that there are some nominal values in the test data set which have not been part of the training data set?

 

Thanks for the report and your support!

 

Best,

Ingo

RM Certified Analyst

Well, I retraced all my data and found an attribute with missing values which was used both to create the model and as input into the simulator. I removed this attribute and all is good now, no error. In the meantime, I'm using feature selection to prevent this issue (assuming this was the root cause). Maybe this is just a suggestion to provide additional error checking as the logs did not point me in this direction, its just something I needed to spend some time figuring out. thanks for your help, Mike

RM Staff

Fantastic Mike, this is very helpful.  We will have a look into this.  In fact, this might actually be fixed already for the next version.  The current Beta release for this is available here in case you want to give it a try on your data:

 

http://static.rapidminer.com/rnd/html/rapidminer-8.2-preview.html

 

Thanks again,

Ingo

Community Manager
Status: Resolved