🦉 🎤   RapidMiner Wisdom 2020 - CALL FOR SPEAKERS   🦉 🎤

We are inviting all community members to submit proposals to speak at Wisdom 2020 in Boston.


Whether it's a cool RapidMiner trick or a use case implementation, we want to see what you have.
Form link is below and deadline for submissions is November 15. See you in Boston!

CLICK HERE TO GO TO ENTRY FORM

Bug in Cross Validation Operator

Fred12Fred12 Member Posts: 344   Unicorn
edited November 2018 in Help

hi,

previously, I used Performance(Classification) in the old Validation (X-Validation) Operator.

 

cv.PNG

when I activated the cases for performance measures like weighted mean precision, margin, squared error etc. it was showed in the log operator outside the X-Validation.

 

However, with the new Cross Validation operator, the values of the Performance operator doesn't show anymore... its missing values instead "?"...

I think its a bug...

here is a XML example process with iris data to show the problem..

I could not find the old X-Validation anymore.. therefore the problem should be fixed soon please as I am constructing my wm_FScore out of the wm_Recall and wm_Precision

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.4.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="85">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="x_validation" compatibility="7.4.000" expanded="true" height="124" name="Validation" width="90" x="447" y="85">
<parameter key="number_of_validations" value="4"/>
<parameter key="sampling_type" value="stratified sampling"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine_libsvm" compatibility="7.4.000" expanded="true" height="82" name="SVM" width="90" x="112" y="34">
<parameter key="gamma" value="0.007170375144641631"/>
<parameter key="C" value="227500.0"/>
<list key="class_weights"/>
</operator>
<connect from_port="training" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.4.000" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.4.000" expanded="true" height="82" name="Performance" width="90" x="313" y="34">
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="correlation" value="true"/>
<parameter key="squared_correlation" value="true"/>
<parameter key="margin" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="7.4.000" expanded="true" height="82" name="Log" width="90" x="581" y="136">
<list key="log">
<parameter key="xvalperf" value="operator.Validation.value.performance"/>
<parameter key="perfperf" value="operator.Performance.value.accuracy"/>
<parameter key="perfkappa" value="operator.Performance.value.kappa"/>
<parameter key="perfcorr" value="operator.Performance.value.correlation"/>
<parameter key="perfwmprec" value="operator.Performance.value.weighted_mean_precision"/>
<parameter key="perfwmrec" value="operator.Performance.value.weighted_mean_recall"/>
</list>
</operator>
<operator activated="true" class="retrieve" compatibility="7.4.000" expanded="true" height="68" name="Retrieve Iris (2)" width="90" x="112" y="340">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="7.4.000" expanded="true" height="145" name="Cross Validation" width="90" x="447" y="289">
<process expanded="true">
<operator activated="true" class="support_vector_machine_libsvm" compatibility="7.4.000" expanded="true" height="82" name="SVM (2)" width="90" x="45" y="34">
<parameter key="gamma" value="0.007170375144641631"/>
<parameter key="C" value="227500.0"/>
<list key="class_weights"/>
</operator>
<connect from_port="training set" to_op="SVM (2)" to_port="training set"/>
<connect from_op="SVM (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.4.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.4.000" expanded="true" height="82" name="Performance (2)" width="90" x="313" y="34">
<parameter key="kappa" value="true"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="correlation" value="true"/>
<parameter key="squared_correlation" value="true"/>
<parameter key="margin" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
<connect from_op="Performance (2)" from_port="example set" to_port="test set results"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="7.4.000" expanded="true" height="82" name="Log (2)" width="90" x="581" y="340">
<list key="log">
<parameter key="xvalperf" value="operator.Validation.value.performance"/>
<parameter key="perfperf" value="operator.Performance.value.accuracy"/>
<parameter key="perfkappa" value="operator.Performance (2).value.kappa"/>
<parameter key="perfcorr" value="operator.Performance (2).value.correlation"/>
<parameter key="perfwmprec" value="operator.Performance (2).value.weighted_mean_precision"/>
<parameter key="perfwmrec" value="operator.Performance (2).value.weighted_mean_recall"/>
</list>
</operator>
<connect from_op="Retrieve Iris" from_port="output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="result 1"/>
<connect from_op="Retrieve Iris (2)" from_port="output" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="performance 1" to_op="Log (2)" to_port="through 1"/>
<connect from_op="Log (2)" from_port="through 1" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>

 

Thomas_Ott

Best Answers

  • Thomas_OttThomas_Ott Posts: 1,761   Unicorn
    Solution Accepted

    Thanks @Fred12, something doesn't look quite right as you pointed out. I have passed this on internally. Thanks!

  • Telcontar120Telcontar120 Posts: 1,256   Unicorn
    Solution Accepted

    @Fred12 @Thomas_Ott Not a permanent solution, but a temporary workaround is to instead log the performance metric values  coming out of the Cross Validation operator directly.  You have the option of getting the first 3 in the log parameters.  

    See this xml version.

     

     

    Brian T.
    Lindon Ventures 
    Data Science Consulting from Certified RapidMiner Experts
  • Fred12Fred12 Posts: 344   Unicorn
    Solution Accepted

    thanks, that worked!

Sign In or Register to comment.