Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
No "prediction" attribute?
I'm probably missing something obvious, but I hope you'll help out anyway!
I have a RM process that reads a model (neural net), applies it to some test data, and then writes the results to an Excel file. When I examine the Excel file all has worked fine, and the last column is labeled "prediction(mov)" and contains the predicted result.
However, the prediction isn't quite in the form I need it, so I'd like to add some operators in the process chain after the "Apply Model" to modify the prediction. Unfortunately, the prediction attribute doesn't appear in the "labeled data" output by Apply Model. Or rather, it does show up in the summary as a special attribute, but it isn't in the attribute list, so I can't use it in say, "Generate Attribute".
I'll append the XML below if that's of any value.
Any suggestions on how to fix this? Do I need to create the prediction attribute myself (e.g., w "Generate Attribute") before applying the model? I appreciate any help, thanks!
-- Scott
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<parameter key="logverbosity" value="3"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="1"/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="parallelize_main_process" value="false"/>
<process expanded="true" height="701" width="974">
<operator activated="true" class="read_model" expanded="true" height="60" name="Read Model" width="90" x="112" y="75">
<parameter key="model_file" value="C:\Lisp\NCAA2\Models\nn-full-margin.mod"/>
</operator>
<operator activated="true" class="read_excel" expanded="true" height="60" name="Read Excel" width="90" x="45" y="570">
<parameter key="excel_file" value="C:\Lisp\NCAA2\Test Data\season-2010-2-25.xls"/>
<parameter key="sheet_number" value="1"/>
<parameter key="row_offset" value="0"/>
<parameter key="column_offset" value="0"/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="179" y="570">
<parameter key="name" value="ID"/>
<parameter key="target_role" value="id"/>
</operator>
<operator activated="true" class="numerical_to_real" expanded="true" height="76" name="Numerical to Real" width="90" x="313" y="570">
<parameter key="attribute_filter_type" value="0"/>
<parameter key="attribute" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="0"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="2"/>
<parameter key="block_type" value="0"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="2"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="select_attributes" expanded="true" height="76" name="Select Attributes" width="90" x="447" y="570">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="Ascore|Away|Date|Home|Hscore"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="0"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="11"/>
<parameter key="block_type" value="0"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="8"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="246" y="75">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="join" expanded="true" height="76" name="Join" width="90" x="380" y="75">
<parameter key="remove_double_attributes" value="true"/>
<parameter key="join_type" value="0"/>
</operator>
<operator activated="true" class="generate_attributes" expanded="true" height="76" name="Generate Attributes" width="90" x="514" y="75">
<list key="function_descriptions">
<parameter key="actual" value="if(Hscore>Ascore,1,-1)"/>
</list>
<parameter key="use_standard_constants" value="true"/>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="648" y="75">
<list key="function_descriptions">
<parameter key="correct" value="sgn(prediction)"/>
</list>
<parameter key="use_standard_constants" value="true"/>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="write_excel" expanded="true" height="60" name="Write Excel" width="90" x="849" y="75">
<parameter key="excel_file" value="C:\Lisp\NCAA2\Test Data\2010-out-02-25-margin.xls"/>
</operator>
<connect from_op="Read Model" from_port="output" to_op="Apply Model" to_port="model"/>
<connect from_op="Read Excel" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Numerical to Real" to_port="example set input"/>
<connect from_op="Numerical to Real" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Select Attributes" from_port="original" to_op="Join" to_port="right"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Join" to_port="left"/>
<connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Write Excel" to_port="input"/>
<connect from_op="Write Excel" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
I have a RM process that reads a model (neural net), applies it to some test data, and then writes the results to an Excel file. When I examine the Excel file all has worked fine, and the last column is labeled "prediction(mov)" and contains the predicted result.
However, the prediction isn't quite in the form I need it, so I'd like to add some operators in the process chain after the "Apply Model" to modify the prediction. Unfortunately, the prediction attribute doesn't appear in the "labeled data" output by Apply Model. Or rather, it does show up in the summary as a special attribute, but it isn't in the attribute list, so I can't use it in say, "Generate Attribute".
I'll append the XML below if that's of any value.
Any suggestions on how to fix this? Do I need to create the prediction attribute myself (e.g., w "Generate Attribute") before applying the model? I appreciate any help, thanks!
-- Scott
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<parameter key="logverbosity" value="3"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="1"/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="parallelize_main_process" value="false"/>
<process expanded="true" height="701" width="974">
<operator activated="true" class="read_model" expanded="true" height="60" name="Read Model" width="90" x="112" y="75">
<parameter key="model_file" value="C:\Lisp\NCAA2\Models\nn-full-margin.mod"/>
</operator>
<operator activated="true" class="read_excel" expanded="true" height="60" name="Read Excel" width="90" x="45" y="570">
<parameter key="excel_file" value="C:\Lisp\NCAA2\Test Data\season-2010-2-25.xls"/>
<parameter key="sheet_number" value="1"/>
<parameter key="row_offset" value="0"/>
<parameter key="column_offset" value="0"/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="179" y="570">
<parameter key="name" value="ID"/>
<parameter key="target_role" value="id"/>
</operator>
<operator activated="true" class="numerical_to_real" expanded="true" height="76" name="Numerical to Real" width="90" x="313" y="570">
<parameter key="attribute_filter_type" value="0"/>
<parameter key="attribute" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="0"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="2"/>
<parameter key="block_type" value="0"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="2"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="select_attributes" expanded="true" height="76" name="Select Attributes" width="90" x="447" y="570">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="Ascore|Away|Date|Home|Hscore"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="0"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="11"/>
<parameter key="block_type" value="0"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="8"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="246" y="75">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="join" expanded="true" height="76" name="Join" width="90" x="380" y="75">
<parameter key="remove_double_attributes" value="true"/>
<parameter key="join_type" value="0"/>
</operator>
<operator activated="true" class="generate_attributes" expanded="true" height="76" name="Generate Attributes" width="90" x="514" y="75">
<list key="function_descriptions">
<parameter key="actual" value="if(Hscore>Ascore,1,-1)"/>
</list>
<parameter key="use_standard_constants" value="true"/>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="generate_attributes" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="648" y="75">
<list key="function_descriptions">
<parameter key="correct" value="sgn(prediction)"/>
</list>
<parameter key="use_standard_constants" value="true"/>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="write_excel" expanded="true" height="60" name="Write Excel" width="90" x="849" y="75">
<parameter key="excel_file" value="C:\Lisp\NCAA2\Test Data\2010-out-02-25-margin.xls"/>
</operator>
<connect from_op="Read Model" from_port="output" to_op="Apply Model" to_port="model"/>
<connect from_op="Read Excel" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Numerical to Real" to_port="example set input"/>
<connect from_op="Numerical to Real" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Select Attributes" from_port="original" to_op="Join" to_port="right"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Join" to_port="left"/>
<connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Write Excel" to_port="input"/>
<connect from_op="Write Excel" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0
Answers
You can rename and change the role of the attribute from prediction to regular, then it becomes usable, like this...
-- Scott
Intriguing, what model appliers do for a living is to stick in prediction columns, so if yours aren't showing I'd check that the join is working ( mouse over the input ports ), or take it out altogether to check the rest. Without the data it is hard to say more.
So apparently the problem has to do with reading the model from a file?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input>
<location/>
</input>
<output>
<location/>
<location/>
</output>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="566" width="969">
<operator activated="true" class="generate_data" expanded="true" height="60" name="Generate Data" width="90" x="112" y="75"/>
<operator activated="true" class="support_vector_machine" expanded="true" height="112" name="SVM" width="90" x="112" y="210"/>
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="313" y="210">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="rename" expanded="true" height="76" name="Rename" width="90" x="447" y="210">
<parameter key="old_name" value="prediction(label)"/>
<parameter key="new_name" value="nu"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="581" y="210">
<parameter key="name" value="nu"/>
</operator>
<operator activated="true" class="generate_attributes" expanded="true" height="76" name="Generate Attributes" width="90" x="715" y="210">
<list key="function_descriptions">
<parameter key="test" value="nu*att1"/>
</list>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="SVM" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<parameter key="logverbosity" value="3"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="1"/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="parallelize_main_process" value="false"/>
<process expanded="true" height="566" width="969">
<operator activated="true" class="generate_data" expanded="true" height="60" name="Generate Data (2)" width="90" x="179" y="345">
<parameter key="target_function" value="random"/>
<parameter key="number_examples" value="100"/>
<parameter key="number_of_attributes" value="5"/>
<parameter key="attributes_lower_bound" value="-10.0"/>
<parameter key="attributes_upper_bound" value="10.0"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="datamanagement" value="0"/>
</operator>
<operator activated="true" class="read_model" expanded="true" height="60" name="Read Model" width="90" x="112" y="210">
<parameter key="model_file" value="C:\Lisp\NCAA2\Models\nn-full.mod"/>
</operator>
<operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="313" y="210">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="rename" expanded="true" height="76" name="Rename" width="90" x="447" y="210">
<parameter key="old_name" value="prediction(label)"/>
<parameter key="new_name" value="nu"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="581" y="210">
<parameter key="name" value="nu"/>
<parameter key="target_role" value="regular"/>
</operator>
<operator activated="true" class="generate_attributes" expanded="true" height="76" name="Generate Attributes" width="90" x="715" y="210">
<list key="function_descriptions">
<parameter key="test" value="nu*att1"/>
</list>
<parameter key="use_standard_constants" value="true"/>
<parameter key="keep_all" value="true"/>
</operator>
<connect from_op="Generate Data (2)" from_port="output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Read Model" from_port="output" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
However, and this is the point, if you press the play button the process does work, and the prediction attribute does appear in the labeled data output from "Apply Model", as in the following.
Thanks for your help (and patience!)
-- Scott
I spend most of my time writing Prolog, yes really, Prolog, and know exactly what you mean. The truth about RM is that it is very idiot proof, as I can vouch for, and produces much less silicon smoke than a really serious Prolog stack explosion; on the other hand when you need a built-in theorem prover .... So horses for courses.
Happy datagrinding!