Bug in Loop and Deliver Best

marcin_blachnikmarcin_blachnik Member Posts: 55  Guru
edited November 2018 in Help

I have noticed that, if one of the performances delivered to the "Loop and Deliver Best" operator is missing (NAN is delivered) then that operator treats it as the best performance and returns it at the output.  Below is a sample process:

 <context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.4.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="93" y="74">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="set_data" compatibility="7.4.000" expanded="true" height="82" name="Set Data" width="90" x="241" y="75">
<parameter key="example_index" value="3"/>
<parameter key="attribute_name" value="a1"/>
<parameter key="value" value="NaN"/>
<list key="additional_values"/>
</operator>
<operator activated="true" class="loop_and_deliver_best" compatibility="7.4.000" expanded="true" height="103" name="Loop and Deliver Best" width="90" x="514" y="69">
<process expanded="true">
<operator activated="true" class="extract_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="314" y="34">
<parameter key="performance_type" value="data_value"/>
<parameter key="attribute_name" value="a1"/>
<parameter key="example_index" value="%{a}"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.4.000" expanded="true" height="103" name="Multiply" width="90" x="556" y="34"/>
<connect from_port="in 1" to_op="Performance" to_port="example set"/>
<connect from_op="Performance" from_port="performance" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_port="performance vector"/>
<connect from_op="Multiply" from_port="output 2" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_performance vector" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Iris" from_port="output" to_op="Set Data" to_port="example set input"/>
<connect from_op="Set Data" from_port="example set output" to_op="Loop and Deliver Best" to_port="in 1"/>
<connect from_op="Loop and Deliver Best" from_port="performance" to_port="result 1"/>
<connect from_op="Loop and Deliver Best" from_port="out 1" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>

Best

 

Marcin

 

Thomas_Ott

Answers

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761   Unicorn

    Can you repaste the XML or export it as an RMP file? I can't get it to populate. 

  • marcin_blachnikmarcin_blachnik Member Posts: 55  Guru

    Sorry

     

    There was one tag missing:

    <?xml version="1.0" encoding="UTF-8"?>
    <process version="7.4.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
    <operator activated="true" class="retrieve" compatibility="7.4.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="93" y="74">
    <parameter key="repository_entry" value="//Samples/data/Iris"/>
    </operator>
    <operator activated="true" class="set_data" compatibility="7.4.000" expanded="true" height="82" name="Set Data" width="90" x="241" y="75">
    <parameter key="example_index" value="3"/>
    <parameter key="count_backwards" value="false"/>
    <parameter key="attribute_name" value="a1"/>
    <parameter key="value" value="NaN"/>
    <list key="additional_values"/>
    </operator>
    <operator activated="true" class="loop_and_deliver_best" compatibility="7.4.000" expanded="true" height="103" name="Loop and Deliver Best" width="90" x="514" y="69">
    <parameter key="iterations" value="10"/>
    <parameter key="enable_timeout" value="false"/>
    <parameter key="timeout" value="1"/>
    <process expanded="true">
    <operator activated="true" class="extract_performance" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="314" y="34">
    <parameter key="performance_type" value="data_value"/>
    <parameter key="statistics" value="average"/>
    <parameter key="attribute_name" value="a1"/>
    <parameter key="example_index" value="%{a}"/>
    <parameter key="optimization_direction" value="maximize"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="7.4.000" expanded="true" height="103" name="Multiply" width="90" x="556" y="34"/>
    <connect from_port="in 1" to_op="Performance" to_port="example set"/>
    <connect from_op="Performance" from_port="performance" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_port="performance vector"/>
    <connect from_op="Multiply" from_port="output 2" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_performance vector" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Retrieve Iris" from_port="output" to_op="Set Data" to_port="example set input"/>
    <connect from_op="Set Data" from_port="example set output" to_op="Loop and Deliver Best" to_port="in 1"/>
    <connect from_op="Loop and Deliver Best" from_port="performance" to_port="result 1"/>
    <connect from_op="Loop and Deliver Best" from_port="out 1" to_port="result 2"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    </process>
    </operator>
    </process>
  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761   Unicorn

    Hmm. I'm not sure if that's a "bug bug" since you are introducing a NaN or missing input. Maybe it should give you a warning since there is a missing value that it might not work right. That happens to on the Forecasting Perf operator too if I don't keep good data quality. 

  • marcin_blachnikmarcin_blachnik Member Posts: 55  Guru

    Hmm,

    I just wanted to pay your attention on such a strange interpretation of "Best Value".

     

    By the way is there already any working bug tracker available?

     

    Thomas_Ott
  • mschmitzmschmitz Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 2,132  RM Data Scientist

    Hi Marcin,

     

    i've check our code. It's com/rapidminer/operator/performance/PerformanceCriterion.java line 102:

     

    	@Override
    public int compareTo(PerformanceCriterion o) {
    if (!this.getClass().equals(o.getClass())) {
    throw new RuntimeException("Mismatched criterion class:" + this.getClass() + ", " + o.getClass());
    }
    if (!o.getName().equals(this.getName())) {
    throw new RuntimeException("Mismatched criterion type:" + this.getName() + ", " + o.getName());
    }
    return Double.compare(this.getFitness(), o.getFitness());
    }

    which uses the Java method compare. Having a look at this it is:

        /**
    * Compares the two specified {@code double} values. The sign
    * of the integer value returned is the same as that of the
    * integer that would be returned by the call:
    * <pre>
    * new Double(d1).compareTo(new Double(d2))
    * </pre>
    *
    * @param d1 the first {@code double} to compare
    * @param d2 the second {@code double} to compare
    * @return the value {@code 0} if {@code d1} is
    * numerically equal to {@code d2}; a value less than
    * {@code 0} if {@code d1} is numerically less than
    * {@code d2}; and a value greater than {@code 0}
    * if {@code d1} is numerically greater than
    * {@code d2}.
    * @since 1.4
    */
    public static int compare(double d1, double d2) {
    if (d1 < d2)
    return -1; // Neither val is NaN, thisVal is smaller
    if (d1 > d2)
    return 1; // Neither val is NaN, thisVal is larger

    // Cannot use doubleToRawLongBits because of possibility of NaNs.
    long thisBits = Double.doubleToLongBits(d1);
    long anotherBits = Double.doubleToLongBits(d2);

    return (thisBits == anotherBits ? 0 : // Values are equal
    (thisBits < anotherBits ? -1 : // (-0.0, 0.0) or (!NaN, NaN)
    1)); // (0.0, -0.0) or (NaN, !NaN)
    }

    Which explains the behaviour. It might make some sense to handle NaNs in our own methods. What would you expect? NaN always "worse" than any other performance?

     

    ~Martin

    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany
    Thomas_Ott
Sign In or Register to comment.