Does the store operator automatically store the best attribute set discovered?

brett_800 · July 2014

Does the store operator automatically store the best attribute weights set discovered in this loop process?

I have a Wrapper X-Validation inside a loop process manipulating a parameter of the MRMR pre-processor and my chosen model operator (W-RandomForest). The process is based on the process from Hoffman and Klinkenburg's book.

version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.015" expanded="true" height="60" name="Retrieve CFB_TOTALS_LEARNING_SET" width="90" x="45" y="210">
<parameter key="repository_entry" value="CFB_TOTALS_LEARNING_SET"/>
</operator>
<operator activated="true" class="loop_parameters" compatibility="5.3.015" expanded="true" height="76" name="Loop Parameters" width="90" x="313" y="210">
<list key="parameters">
<parameter key="MRMR-FS.k" value="[20;220;5;linear]"/>
<parameter key="W-RandomForest.I" value="[30;300;5;linear]"/>
</list>
<parameter key="synchronize" value="true"/>
<process expanded="true">
<operator activated="true" class="wrapper_x_validation" compatibility="5.3.015" expanded="true" height="76" name="Validation" width="90" x="112" y="75">
<parameter key="number_of_validations" value="5"/>
<process expanded="true">
<operator activated="true" class="featselext:mrmr_feature_selection" compatibility="1.1.004" expanded="true" height="76" name="MRMR-FS" width="90" x="45" y="30">
<parameter key="k" value="220"/>
<parameter key="relevance_redundancy_relation" value="difference"/>
</operator>
<connect from_port="weighting set source" to_op="MRMR-FS" to_port="example set"/>
<connect from_op="MRMR-FS" from_port="weights" to_port="attribute weights sink"/>
<portSpacing port="source_weighting set source" spacing="0"/>
<portSpacing port="sink_attribute weights sink" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="weka:W-RandomForest" compatibility="5.3.001" expanded="true" height="76" name="W-RandomForest" width="90" x="45" y="30">
<parameter key="I" value="300.0"/>
<parameter key="depth" value="15"/>
</operator>
<connect from_port="train set source" to_op="W-RandomForest" to_port="training set"/>
<connect from_op="W-RandomForest" from_port="model" to_port="model sink"/>
<portSpacing port="source_train set source" spacing="0"/>
<portSpacing port="sink_model sink" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.015" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="5.3.015" expanded="true" height="76" name="Performance" width="90" x="45" y="255"/>
<operator activated="true" class="log" compatibility="5.3.015" expanded="true" height="76" name="Log" width="90" x="179" y="255">
<parameter key="filename" value="C:\Users\Brett\Desktop\CFB_TOTALS_PREPROCESS_LOG.log"/>
<list key="log">
<parameter key="accuracy" value="operator.Performance.value.accuracy"/>
<parameter key="i" value="operator.Validation.value.iteration"/>
<parameter key="j" value="operator.Loop Parameters.value.iteration"/>
<parameter key="precision" value="operator.Performance.value.precision"/>
<parameter key="recall" value="operator.Performance.value.recall"/>
<parameter key="false positives" value="operator.Performance.value.false_positive"/>
<parameter key="false negatives" value="operator.Performance.value.false_negative"/>
<parameter key="true positives" value="operator.Performance.value.true_positive"/>
<parameter key="true negatives" value="operator.Performance.value.true_negative"/>
</list>
<parameter key="persistent" value="true"/>
</operator>
<operator activated="true" class="free_memory" compatibility="5.3.015" expanded="true" height="76" name="Free Memory" width="90" x="246" y="390"/>
<connect from_port="test set source" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_port="model source" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_op="Free Memory" to_port="through 1"/>
<connect from_op="Free Memory" from_port="through 1" to_port="performance vector sink"/>
<portSpacing port="source_test set source" spacing="0"/>
<portSpacing port="source_model source" spacing="0"/>
<portSpacing port="sink_performance vector sink" spacing="0"/>
</process>
</operator>
<operator activated="true" class="store" compatibility="5.3.015" expanded="true" height="60" name="Store" width="90" x="246" y="120">
<parameter key="repository_entry" value="//CFB_TOTALS/PREPROCESSING/A. CFB_MODEL_V2_TOTALS_STORE_SELECTED_FEATURES"/>
</operator>
<connect from_port="input 1" to_op="Validation" to_port="example set in"/>
<connect from_op="Validation" from_port="performance vector out" to_port="performance"/>
<connect from_op="Validation" from_port="attribute weights out" to_op="Store" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve CFB_TOTALS_LEARNING_SET" from_port="output" to_op="Loop Parameters" to_port="input 1"/>
<connect from_op="Loop Parameters" from_port="result 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

The following log (shown below) tells me that the iteration of i=3 and j=3 is the best choice with an accuracy of 0.573. However i'm not even sure what 'i' and 'j' actually is, and so if RM doesn't automatically choose the best result to record the attribute weights in the store operator, I don't know how to manually set i and j without knowing what they represent.

# Generated by Log[com.rapidminer.operator.visualization.ProcessLogOperator]
# accuracy i j precision recall false positives false negatives true positives true negatives
0.5179775280898876 0.0 0.0 null null null null null null
0.5235955056179775 1.0 0.0 null null null null null null
0.501123595505618 2.0 0.0 null null null null null null
0.5337078651685393 3.0 0.0 null null null null null null
0.5056179775280899 4.0 0.0 null null null null null null
0.4887640449438202 0.0 1.0 null null null null null null
0.5179775280898876 1.0 1.0 null null null null null null
0.5404494382022472 2.0 1.0 null null null null null null
0.5325842696629214 3.0 1.0 null null null null null null
0.5191011235955056 4.0 1.0 null null null null null null
0.5235955056179775 0.0 2.0 null null null null null null
0.5213483146067416 1.0 2.0 null null null null null null
0.5359550561797752 2.0 2.0 null null null null null null
0.5280898876404494 3.0 2.0 null null null null null null
0.5370786516853933 4.0 2.0 null null null null null null
0.5089887640449439 0.0 3.0 null null null null null null
0.5101123595505618 1.0 3.0 null null null null null null
0.5078651685393258 2.0 3.0 null null null null null null
0.5730337078651685 3.0 3.0 null null null null null null
0.5168539325842697 4.0 3.0 null null null null null null
0.5078651685393258 0.0 4.0 null null null null null null
0.5146067415730337 1.0 4.0 null null null null null null
0.503370786516854 2.0 4.0 null null null null null null
0.5224719101123596 3.0 4.0 null null null null null null
0.5292134831460674 4.0 4.0 null null null null null null
0.5213483146067416 0.0 5.0 null null null null null null
0.5348314606741573 1.0 5.0 null null null null null null
0.5292134831460674 2.0 5.0 null null null null null null
0.5370786516853933 3.0 5.0 null null null null null null
0.5112359550561798 4.0 5.0 null null null null null null

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

Does the store operator automatically store the best attribute set discovered?