RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

Difference in classification accuracy(performance vector) for same input -

DavidRajuDavidRaju Member Posts: 18 Contributor II
edited September 2019 in Help
Could you please clarify,
Why Im getting different accuracies (performance vector result) for the same input supplied to two similar models.

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
   <process expanded="true" height="467" width="748">
     <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
       <parameter key="repository_entry" value="//Samples/data/Sonar"/>
     </operator>
     <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="76" name="Multiply" width="90" x="45" y="120"/>
     <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="75">
       <parameter key="attribute_filter_type" value="subset"/>
       <parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
       <parameter key="invert_selection" value="true"/>
     </operator>
     <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="246" y="165"/>
     <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (2)" width="90" x="380" y="30">
       <process expanded="true" height="414" width="346">
         <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (2)" width="90" x="132" y="30"/>
         <connect from_port="training" to_op="Decision Tree (2)" to_port="training set"/>
         <connect from_op="Decision Tree (2)" from_port="model" to_port="model"/>
         <portSpacing port="source_training" spacing="0"/>
         <portSpacing port="sink_model" spacing="0"/>
         <portSpacing port="sink_through 1" spacing="0"/>
       </process>
       <process expanded="true" height="414" width="346">
         <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (2)" width="90" x="45" y="30">
           <list key="application_parameters"/>
         </operator>
         <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (2)" width="90" x="200" y="30">
           <list key="class_weights"/>
         </operator>
         <connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
         <connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
         <connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
         <connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
         <portSpacing port="source_model" spacing="0"/>
         <portSpacing port="source_test set" spacing="0"/>
         <portSpacing port="source_through 1" spacing="0"/>
         <portSpacing port="sink_averagable 1" spacing="0"/>
         <portSpacing port="sink_averagable 2" spacing="0"/>
       </process>
     </operator>
     <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="165">
       <process expanded="true" height="396" width="346">
         <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
         <connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
         <connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
         <portSpacing port="source_training" spacing="0"/>
         <portSpacing port="sink_model" spacing="0"/>
         <portSpacing port="sink_through 1" spacing="0"/>
       </process>
       <process expanded="true" height="396" width="346">
         <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
           <list key="application_parameters"/>
         </operator>
         <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
           <list key="class_weights"/>
         </operator>
         <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
         <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
         <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
         <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
         <portSpacing port="source_model" spacing="0"/>
         <portSpacing port="source_test set" spacing="0"/>
         <portSpacing port="source_through 1" spacing="0"/>
         <portSpacing port="sink_averagable 1" spacing="0"/>
         <portSpacing port="sink_averagable 2" spacing="0"/>
       </process>
     </operator>
     <connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
     <connect from_op="Multiply" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
     <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
     <connect from_op="Multiply (2)" from_port="output 1" to_op="Validation (2)" to_port="training"/>
     <connect from_op="Multiply (2)" from_port="output 2" to_op="Validation (3)" to_port="training"/>
     <connect from_op="Validation (2)" from_port="training" to_port="result 1"/>
     <connect from_op="Validation (2)" from_port="averagable 1" to_port="result 2"/>
     <connect from_op="Validation (3)" from_port="training" to_port="result 3"/>
     <connect from_op="Validation (3)" from_port="averagable 1" to_port="result 4"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
     <portSpacing port="sink_result 3" spacing="0"/>
     <portSpacing port="sink_result 4" spacing="0"/>
     <portSpacing port="sink_result 5" spacing="0"/>
   </process>
 </operator>
</process>

see my next post with similar kind of problem

Answers

  • DavidRajuDavidRaju Member Posts: 18 Contributor II
    contd...
    Further why the  accuracy(performance vector)  for the validation operator 1 ( with Decision tree) [ jointly worked with validation operator 2( with KNN)] is not same with validation operator1 (with Decision tree)
    Process 1 -

    i/p A  - validation operator (DT) - o/p (performance vector) = X,
    i/p A  - validation operator (kNN) - o/p (performance vector) = y

    Process 2-
    i/p A  - validation operator (DT) - o/p (performance vector) = Z  (This should be X)

    where X, Y, and  Z are some numeric values.

    Code is given for both the process
    Process1

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
     <context>
       <input/>
       <output/>
       <macros/>
     </context>
     <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
       <process expanded="true" height="467" width="681">
         <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
           <parameter key="repository_entry" value="//Samples/data/Sonar"/>
         </operator>
         <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="75">
           <parameter key="attribute_filter_type" value="subset"/>
           <parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
           <parameter key="invert_selection" value="true"/>
         </operator>
         <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply (2)" width="90" x="246" y="210"/>
         <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (4)" width="90" x="380" y="210">
           <process expanded="true" height="414" width="346">
             <operator activated="false" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (4)" width="90" x="179" y="300"/>
             <operator activated="true" class="k_nn" compatibility="5.2.008" expanded="true" height="76" name="k-NN" width="90" x="179" y="30"/>
             <connect from_port="training" to_op="k-NN" to_port="training set"/>
             <connect from_op="k-NN" from_port="model" to_port="model"/>
             <portSpacing port="source_training" spacing="0"/>
             <portSpacing port="sink_model" spacing="0"/>
             <portSpacing port="sink_through 1" spacing="0"/>
           </process>
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (4)" width="90" x="45" y="30">
               <list key="application_parameters"/>
             </operator>
             <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (4)" width="90" x="200" y="30">
               <list key="class_weights"/>
             </operator>
             <connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
             <connect from_port="test set" to_op="Apply Model (4)" to_port="unlabelled data"/>
             <connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance (4)" to_port="labelled data"/>
             <connect from_op="Performance (4)" from_port="performance" to_port="averagable 1"/>
             <portSpacing port="source_model" spacing="0"/>
             <portSpacing port="source_test set" spacing="0"/>
             <portSpacing port="source_through 1" spacing="0"/>
             <portSpacing port="sink_averagable 1" spacing="0"/>
             <portSpacing port="sink_averagable 2" spacing="0"/>
           </process>
         </operator>
         <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="30">
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
             <connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
             <connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
             <portSpacing port="source_training" spacing="0"/>
             <portSpacing port="sink_model" spacing="0"/>
             <portSpacing port="sink_through 1" spacing="0"/>
           </process>
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
               <list key="application_parameters"/>
             </operator>
             <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
               <list key="class_weights"/>
             </operator>
             <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
             <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
             <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
             <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
             <portSpacing port="source_model" spacing="0"/>
             <portSpacing port="source_test set" spacing="0"/>
             <portSpacing port="source_through 1" spacing="0"/>
             <portSpacing port="sink_averagable 1" spacing="0"/>
             <portSpacing port="sink_averagable 2" spacing="0"/>
           </process>
         </operator>
         <connect from_op="Retrieve" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
         <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
         <connect from_op="Multiply (2)" from_port="output 1" to_op="Validation (3)" to_port="training"/>
         <connect from_op="Multiply (2)" from_port="output 2" to_op="Validation (4)" to_port="training"/>
         <connect from_op="Validation (4)" from_port="averagable 1" to_port="result 3"/>
         <connect from_op="Validation (3)" from_port="training" to_port="result 1"/>
         <connect from_op="Validation (3)" from_port="averagable 1" to_port="result 2"/>
         <portSpacing port="source_input 1" spacing="0"/>
         <portSpacing port="sink_result 1" spacing="0"/>
         <portSpacing port="sink_result 2" spacing="0"/>
         <portSpacing port="sink_result 3" spacing="0"/>
         <portSpacing port="sink_result 4" spacing="0"/>
       </process>
     </operator>
    </process>
    process 2:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.008">
     <context>
       <input/>
       <output/>
       <macros/>
     </context>
     <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
       <process expanded="true" height="467" width="681">
         <operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
           <parameter key="repository_entry" value="//Samples/data/Sonar"/>
         </operator>
         <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="76" name="Multiply" width="90" x="45" y="165"/>
         <operator activated="true" class="select_attributes" compatibility="5.2.008" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="165">
           <parameter key="attribute_filter_type" value="subset"/>
           <parameter key="attributes" value="attribute_14|attribute_15|attribute_16|attribute_17|attribute_18|attribute_19|attribute_2|attribute_22|attribute_23|attribute_24|attribute_25|attribute_26|attribute_27|attribute_28|attribute_29|attribute_3|attribute_30|attribute_31|attribute_32|attribute_33|attribute_34|attribute_38|attribute_39|attribute_40|attribute_41|attribute_42|attribute_50|attribute_53|attribute_55|attribute_56|attribute_57|attribute_58|attribute_59|attribute_60|attribute_7|attribute_6|attribute_8|attribute_35|attribute_20|attribute_5|attribute_54"/>
           <parameter key="invert_selection" value="true"/>
         </operator>
         <operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation (3)" width="90" x="380" y="120">
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree (3)" width="90" x="132" y="30"/>
             <connect from_port="training" to_op="Decision Tree (3)" to_port="training set"/>
             <connect from_op="Decision Tree (3)" from_port="model" to_port="model"/>
             <portSpacing port="source_training" spacing="0"/>
             <portSpacing port="sink_model" spacing="0"/>
             <portSpacing port="sink_through 1" spacing="0"/>
           </process>
           <process expanded="true" height="414" width="346">
             <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (3)" width="90" x="45" y="30">
               <list key="application_parameters"/>
             </operator>
             <operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance (3)" width="90" x="200" y="30">
               <list key="class_weights"/>
             </operator>
             <connect from_port="model" to_op="Apply Model (3)" to_port="model"/>
             <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
             <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
             <connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
             <portSpacing port="source_model" spacing="0"/>
             <portSpacing port="source_test set" spacing="0"/>
             <portSpacing port="source_through 1" spacing="0"/>
             <portSpacing port="sink_averagable 1" spacing="0"/>
             <portSpacing port="sink_averagable 2" spacing="0"/>
           </process>
         </operator>
         <connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
         <connect from_op="Multiply" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
         <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Validation (3)" to_port="training"/>
         <connect from_op="Validation (3)" from_port="training" to_port="result 1"/>
         <connect from_op="Validation (3)" from_port="averagable 1" to_port="result 2"/>
         <portSpacing port="source_input 1" spacing="0"/>
         <portSpacing port="sink_result 1" spacing="0"/>
         <portSpacing port="sink_result 2" spacing="0"/>
         <portSpacing port="sink_result 3" spacing="0"/>
       </process>
     </operator>
    </process>

    Why the result changes from process to process with same input and model

    My work halted with ambiguity in results

    Clarify me
    Thanking you in anticipation
  • wesselwessel Member Posts: 537  Guru
    Random seed?
  • MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869   Unicorn
    Wessel is right, the X-Validation uses random splits, which are different if executed twice in a row. Set a local random seed for the X-Validation to force the same splits for both operators.

    Furthermore I strongly suggest to update from RapidMiner 5.2.8, which is years old, to the current version 5.3.13.

    If you have any further questions, please come back!

    Best regards,
    Marius
  • DavidRajuDavidRaju Member Posts: 18 Contributor II
    Are there any free online books to know more about these parameters to get efficient results
  • wesselwessel Member Posts: 537  Guru
    There is a Weka book, soon a Rapid Miner book will be published I think.

    weka book:
    http://www.cs.waikato.ac.nz/ml/weka/book.html

    rapid miner book:
    http://rapidminerbook.com/

  • DavidRajuDavidRaju Member Posts: 18 Contributor II
    thank you, I will try
Sign In or Register to comment.