Loading from ARFF file

Hello,
I need some help with RapidMiner process.
My goal is to test cross-validation and bootstrap sampling performance.
I did a process like in attachment. In open file block I put ARFF file, but on Naive Bayes block it says "Input example set must have special attribute 'label'".
Does anyone got a clue how to fix this ?
Kind regards,
Mariusz.
Best Answer
-
Dzien dobry, @vaflex91,
I see your process and your arff file. All looks fine. But yes, in order to do a predictive model, you need to set one attribute as the label in order for Naive Bayes (or any other algorithm for that matter) to know what to predict. Assuming you are trying to predict "class", your process simply needs the "Set Role" operator before the Naive Bayes:
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="open_file" compatibility="7.6.001" expanded="true" height="68" name="Open File" width="90" x="179" y="493">
<parameter key="filename" value="/Users/GenzerConsulting/Desktop/foo.arff"/>
</operator>
<operator activated="true" class="advanced_file_connectors:read_arff" compatibility="7.6.001" expanded="true" height="68" name="Read ARFF" width="90" x="313" y="493">
<parameter key="data_file" value="/home/komputer1/Dokumenty/magisterka/data/wybrane/abalone/test.arff"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.6.001" expanded="true" height="82" name="Multiply" width="90" x="514" y="493"/>
<operator activated="true" class="sample_bootstrapping" compatibility="7.6.001" expanded="true" height="82" name="Sample (Bootstrapping)" width="90" x="715" y="493">
<parameter key="sample" value="absolute"/>
<parameter key="use_weights" value="false"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.6.001" expanded="true" height="103" name="Multiply (2)" width="90" x="916" y="493"/>
<operator activated="true" class="set_role" compatibility="7.6.001" expanded="true" height="82" name="Set Role" width="90" x="1050" y="544">
<parameter key="attribute_name" value="class"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="7.6.001" expanded="true" height="82" name="Naive Bayes" width="90" x="1184" y="493"/>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="1184" y="646">
<list key="application_parameters"/>
</operator>
<connect from_op="Open File" from_port="file" to_op="Read ARFF" to_port="file"/>
<connect from_op="Read ARFF" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Sample (Bootstrapping)" to_port="example set input"/>
<connect from_op="Sample (Bootstrapping)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>Scott
1
Answers
-
Hey Mariusz,
Do a search for "label" in the forums. This question has been answered many times.0 -
There is no fix for my problem. Found one tab which is: http://community.rapidminer.com/t5/RapidMiner-Studio-Forum/Label-Attribute-Missing/m-p/5253
The tab doesnt answer to my question how to fix this.This is the XML I got:
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="open_file" compatibility="7.6.001" expanded="true" height="68" name="Open File" width="90" x="179" y="493">
<parameter key="filename" value="/home/komputer1/Dokumenty/magisterka/data/wybrane/abalone/test.arff"/>
</operator>
<operator activated="true" class="advanced_file_connectors:read_arff" compatibility="7.6.001" expanded="true" height="68" name="Read ARFF" width="90" x="313" y="493">
<parameter key="data_file" value="/home/komputer1/Dokumenty/magisterka/data/wybrane/abalone/test.arff"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.6.001" expanded="true" height="82" name="Multiply" width="90" x="514" y="493"/>
<operator activated="true" class="sample_bootstrapping" compatibility="7.6.001" expanded="true" height="82" name="Sample (Bootstrapping)" width="90" x="715" y="493">
<parameter key="sample" value="absolute"/>
<parameter key="use_weights" value="false"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.6.001" expanded="true" height="103" name="Multiply (2)" width="90" x="916" y="493"/>
<operator activated="true" class="naive_bayes" compatibility="7.6.001" expanded="true" height="82" name="Naive Bayes" width="90" x="1184" y="493"/>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="1184" y="646">
<list key="application_parameters"/>
</operator>
<connect from_op="Open File" from_port="file" to_op="Read ARFF" to_port="file"/>
<connect from_op="Read ARFF" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Sample (Bootstrapping)" to_port="example set input"/>
<connect from_op="Sample (Bootstrapping)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>And the beggining of my arff file is:
@relation abalone-weka.filters.unsupervised.instance.Resample-S0-Z30.0-no-replacement
@attribute sex {M,F,I}
@attribute length numeric
@attribute diameter numeric
@attribute height numeric
@attribute wholeweight numeric
@attribute shuckedweight numeric
@attribute visceraweight numeric
@attribute shellweight numeric
@attribute class {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,29}
@data
M,0.43,0.35,0.11,0.406,0.1675,0.081,0.135,10
M,0.49,0.38,0.135,0.5415,0.2175,0.095,0.19,11
F,0.535,0.405,0.145,0.6845,0.2725,0.171,0.205,10
F,0.44,0.34,0.1,0.451,0.188,0.087,0.13,10
M,0.45,0.32,0.1,0.381,0.1705,0.075,0.115,9
M,0.355,0.28,0.095,0.2455,0.0955,0.062,0.075,11
I,0.38,0.275,0.1,0.2255,0.08,0.049,0.085,10
F,0.565,0.44,0.155,0.9395,0.4275,0.214,0.27,12
F,0.55,0.415,0.135,0.7635,0.318,0.21,0.2,9
F,0.56,0.44,0.14,0.9285,0.3825,0.188,0.3,11
F,0.58,0.45,0.185,0.9955,0.3945,0.272,0.285,11Can someone help ?
Kind regards,
Mariusz.
0 -
Dzien dobry, @vaflex91,
I see your process and your arff file. All looks fine. But yes, in order to do a predictive model, you need to set one attribute as the label in order for Naive Bayes (or any other algorithm for that matter) to know what to predict. Assuming you are trying to predict "class", your process simply needs the "Set Role" operator before the Naive Bayes:
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.6.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="open_file" compatibility="7.6.001" expanded="true" height="68" name="Open File" width="90" x="179" y="493">
<parameter key="filename" value="/Users/GenzerConsulting/Desktop/foo.arff"/>
</operator>
<operator activated="true" class="advanced_file_connectors:read_arff" compatibility="7.6.001" expanded="true" height="68" name="Read ARFF" width="90" x="313" y="493">
<parameter key="data_file" value="/home/komputer1/Dokumenty/magisterka/data/wybrane/abalone/test.arff"/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.6.001" expanded="true" height="82" name="Multiply" width="90" x="514" y="493"/>
<operator activated="true" class="sample_bootstrapping" compatibility="7.6.001" expanded="true" height="82" name="Sample (Bootstrapping)" width="90" x="715" y="493">
<parameter key="sample" value="absolute"/>
<parameter key="use_weights" value="false"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.6.001" expanded="true" height="103" name="Multiply (2)" width="90" x="916" y="493"/>
<operator activated="true" class="set_role" compatibility="7.6.001" expanded="true" height="82" name="Set Role" width="90" x="1050" y="544">
<parameter key="attribute_name" value="class"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="7.6.001" expanded="true" height="82" name="Naive Bayes" width="90" x="1184" y="493"/>
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="1184" y="646">
<list key="application_parameters"/>
</operator>
<connect from_op="Open File" from_port="file" to_op="Read ARFF" to_port="file"/>
<connect from_op="Read ARFF" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Sample (Bootstrapping)" to_port="example set input"/>
<connect from_op="Sample (Bootstrapping)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>Scott
1