how to log an plot the output from the optimise grid parameters operator

corkiecorkie Member Posts: 10 Contributor II
edited November 2018 in Help
In the sample code below, I'd like to log the optimisation of a model, e.g. finding the best value for K of a KNN model.
to be to plot the performance of the model during optimisation phase. for a number of parameters.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.3.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.3.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="6.3.000" expanded="true" height="60" name="Iris" width="90" x="45" y="75">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="split_data" compatibility="6.3.000" expanded="true" height="94" name="70/30" width="90" x="45" y="300">
        <enumeration key="partitions">
          <parameter key="ratio" value="0.7"/>
          <parameter key="ratio" value="0.3"/>
        </enumeration>
        <parameter key="sampling_type" value="stratified sampling"/>
      </operator>
      <operator activated="true" class="optimize_parameters_grid" compatibility="6.3.000" expanded="true" height="130" name="Optimize Parameters (Grid)" width="90" x="246" y="120">
        <list key="parameters">
          <parameter key="k-NN.k" value="[1.0;20;20;linear]"/>
        </list>
        <parameter key="parallelize_optimization_process" value="true"/>
        <process expanded="true">
          <operator activated="true" class="x_validation" compatibility="6.3.000" expanded="true" height="112" name="Validation" width="90" x="112" y="30">
            <parameter key="number_of_validations" value="5"/>
            <parameter key="parallelize_training" value="true"/>
            <parameter key="parallelize_testing" value="true"/>
            <process expanded="true">
              <operator activated="true" class="k_nn" compatibility="6.3.000" expanded="true" height="76" name="k-NN" width="90" x="45" y="30">
                <parameter key="k" value="20"/>
              </operator>
              <connect from_port="training" to_op="k-NN" to_port="training set"/>
              <connect from_op="k-NN" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="6.3.000" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_classification" compatibility="6.3.000" expanded="true" height="76" name="Performance (2)" width="90" x="179" y="30">
                <parameter key="classification_error" value="true"/>
                <parameter key="weighted_mean_recall" value="true"/>
                <parameter key="weighted_mean_precision" value="true"/>
                <list key="class_weights"/>
              </operator>
              <connect from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
              <connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="remember" compatibility="6.3.000" expanded="true" height="60" name="Remember" width="90" x="246" y="30">
            <parameter key="name" value="Mymodel"/>
            <parameter key="io_object" value="Model"/>
          </operator>
          <connect from_port="input 1" to_op="Validation" to_port="training"/>
          <connect from_op="Validation" from_port="model" to_op="Remember" to_port="store"/>
          <connect from_op="Validation" from_port="training" to_port="result 2"/>
          <connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
          <connect from_op="Remember" from_port="stored" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="delay" compatibility="6.3.000" expanded="true" height="76" name="Delay" width="90" x="179" y="345">
        <parameter key="delay" value="random"/>
        <parameter key="min_delay_amount" value="10"/>
        <parameter key="max_delay_amount" value="1010"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="6.3.000" expanded="true" height="112" name="ApplyModel" width="90" x="313" y="345">
        <process expanded="true">
          <operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="Select Attributes" width="90" x="112" y="120">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="label"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="recall" compatibility="6.3.000" expanded="true" height="60" name="Recall" width="90" x="112" y="30">
            <parameter key="name" value="Mymodel"/>
            <parameter key="io_object" value="Model"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="6.3.000" expanded="true" height="76" name="Apply Model (2)" width="90" x="246" y="75">
            <list key="application_parameters"/>
          </operator>
          <connect from_port="in 1" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
          <connect from_op="Select Attributes" from_port="original" to_port="out 3"/>
          <connect from_op="Recall" from_port="result" to_op="Apply Model (2)" to_port="model"/>
          <connect from_op="Apply Model (2)" from_port="labelled data" to_port="out 1"/>
          <connect from_op="Apply Model (2)" from_port="model" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="0"/>
          <portSpacing port="sink_out 4" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="6.3.000" expanded="true" height="94" name="Eval Model" width="90" x="447" y="345">
        <process expanded="true">
          <operator activated="true" class="join" compatibility="6.3.000" expanded="true" height="76" name="Join" width="90" x="45" y="30">
            <parameter key="join_type" value="outer"/>
            <list key="key_attributes"/>
          </operator>
          <operator activated="true" class="rename_by_replacing" compatibility="6.3.000" expanded="true" height="76" name="Rename by Replacing" width="90" x="179" y="30">
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="6.3.000" expanded="true" height="76" name="Generate Attributes" width="90" x="313" y="30">
            <list key="function_descriptions">
              <parameter key="scoreEval" value="if(predictionlabel==label,&quot;true&quot;,&quot;false&quot;)"/>
            </list>
            <parameter key="use_standard_constants" value="false"/>
          </operator>
          <connect from_port="in 1" to_op="Join" to_port="left"/>
          <connect from_port="in 2" to_op="Join" to_port="right"/>
          <connect from_op="Join" from_port="join" to_op="Rename by Replacing" to_port="example set input"/>
          <connect from_op="Rename by Replacing" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="source_in 3" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Iris" from_port="output" to_op="70/30" to_port="example set"/>
      <connect from_op="70/30" from_port="partition 1" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
      <connect from_op="70/30" from_port="partition 2" to_op="Delay" to_port="through 1"/>
      <connect from_op="Delay" from_port="through 1" to_op="ApplyModel" to_port="in 1"/>
      <connect from_op="ApplyModel" from_port="out 1" to_op="Eval Model" to_port="in 1"/>
      <connect from_op="ApplyModel" from_port="out 3" to_op="Eval Model" to_port="in 2"/>
      <connect from_op="Eval Model" from_port="out 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Answers

  • MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,503 RM Data Scientist
    You cane use the log operator as shown below, then you get the results as a log example set and can plot them

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="6.3.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="6.3.000" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="6.3.000" expanded="true" height="60" name="Iris" width="90" x="45" y="75">
            <parameter key="repository_entry" value="//Samples/data/Iris"/>
          </operator>
          <operator activated="true" class="split_data" compatibility="6.3.000" expanded="true" height="94" name="70/30" width="90" x="45" y="300">
            <enumeration key="partitions">
              <parameter key="ratio" value="0.7"/>
              <parameter key="ratio" value="0.3"/>
            </enumeration>
            <parameter key="sampling_type" value="stratified sampling"/>
          </operator>
          <operator activated="true" class="optimize_parameters_grid" compatibility="6.3.000" expanded="true" height="130" name="Optimize Parameters (Grid)" width="90" x="246" y="120">
            <list key="parameters">
              <parameter key="k-NN.k" value="[1.0;20;20;linear]"/>
            </list>
            <process expanded="true">
              <operator activated="true" class="x_validation" compatibility="6.3.000" expanded="true" height="112" name="Validation" width="90" x="112" y="30">
                <parameter key="number_of_validations" value="5"/>
                <process expanded="true">
                  <operator activated="true" class="k_nn" compatibility="6.3.000" expanded="true" height="76" name="k-NN" width="90" x="45" y="30">
                    <parameter key="k" value="16"/>
                  </operator>
                  <connect from_port="training" to_op="k-NN" to_port="training set"/>
                  <connect from_op="k-NN" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true">
                  <operator activated="true" class="apply_model" compatibility="6.3.000" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance_classification" compatibility="6.3.000" expanded="true" height="76" name="Performance (2)" width="90" x="179" y="30">
                    <parameter key="classification_error" value="true"/>
                    <parameter key="weighted_mean_recall" value="true"/>
                    <parameter key="weighted_mean_precision" value="true"/>
                    <list key="class_weights"/>
                  </operator>
                  <connect from_port="model" to_op="Apply Model" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
                  <connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
                  <connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="remember" compatibility="6.3.000" expanded="true" height="60" name="Remember" width="90" x="246" y="30">
                <parameter key="name" value="Mymodel"/>
                <parameter key="io_object" value="Model"/>
              </operator>
              <operator activated="true" class="log" compatibility="6.3.000" expanded="true" height="76" name="Log" width="90" x="447" y="75">
                <list key="log">
                  <parameter key="k" value="operator.k-NN.parameter.k"/>
                  <parameter key="performance" value="operator.Validation.value.performance"/>
                  <parameter key="deviation" value="operator.Validation.value.deviation"/>
                </list>
              </operator>
              <connect from_port="input 1" to_op="Validation" to_port="training"/>
              <connect from_op="Validation" from_port="model" to_op="Remember" to_port="store"/>
              <connect from_op="Validation" from_port="training" to_op="Log" to_port="through 1"/>
              <connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
              <connect from_op="Remember" from_port="stored" to_port="result 1"/>
              <connect from_op="Log" from_port="through 1" to_port="result 2"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
              <portSpacing port="sink_result 1" spacing="0"/>
              <portSpacing port="sink_result 2" spacing="0"/>
              <portSpacing port="sink_result 3" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="delay" compatibility="6.3.000" expanded="true" height="76" name="Delay" width="90" x="179" y="345">
            <parameter key="delay" value="random"/>
            <parameter key="min_delay_amount" value="10"/>
            <parameter key="max_delay_amount" value="1010"/>
          </operator>
          <operator activated="true" class="subprocess" compatibility="6.3.000" expanded="true" height="112" name="ApplyModel" width="90" x="313" y="345">
            <process expanded="true">
              <operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="Select Attributes" width="90" x="112" y="120">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="label"/>
                <parameter key="invert_selection" value="true"/>
              </operator>
              <operator activated="true" class="recall" compatibility="6.3.000" expanded="true" height="60" name="Recall" width="90" x="112" y="30">
                <parameter key="name" value="Mymodel"/>
                <parameter key="io_object" value="Model"/>
              </operator>
              <operator activated="true" class="apply_model" compatibility="6.3.000" expanded="true" height="76" name="Apply Model (2)" width="90" x="246" y="75">
                <list key="application_parameters"/>
              </operator>
              <connect from_port="in 1" to_op="Select Attributes" to_port="example set input"/>
              <connect from_op="Select Attributes" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
              <connect from_op="Select Attributes" from_port="original" to_port="out 3"/>
              <connect from_op="Recall" from_port="result" to_op="Apply Model (2)" to_port="model"/>
              <connect from_op="Apply Model (2)" from_port="labelled data" to_port="out 1"/>
              <connect from_op="Apply Model (2)" from_port="model" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
              <portSpacing port="sink_out 4" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="subprocess" compatibility="6.3.000" expanded="true" height="94" name="Eval Model" width="90" x="447" y="345">
            <process expanded="true">
              <operator activated="true" class="join" compatibility="6.3.000" expanded="true" height="76" name="Join" width="90" x="45" y="30">
                <parameter key="join_type" value="outer"/>
                <list key="key_attributes"/>
              </operator>
              <operator activated="true" class="rename_by_replacing" compatibility="6.3.000" expanded="true" height="76" name="Rename by Replacing" width="90" x="179" y="30">
                <parameter key="include_special_attributes" value="true"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="6.3.000" expanded="true" height="76" name="Generate Attributes" width="90" x="313" y="30">
                <list key="function_descriptions">
                  <parameter key="scoreEval" value="if(predictionlabel==label,&quot;true&quot;,&quot;false&quot;)"/>
                </list>
                <parameter key="use_standard_constants" value="false"/>
              </operator>
              <connect from_port="in 1" to_op="Join" to_port="left"/>
              <connect from_port="in 2" to_op="Join" to_port="right"/>
              <connect from_op="Join" from_port="join" to_op="Rename by Replacing" to_port="example set input"/>
              <connect from_op="Rename by Replacing" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
              <connect from_op="Generate Attributes" from_port="example set output" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Iris" from_port="output" to_op="70/30" to_port="example set"/>
          <connect from_op="70/30" from_port="partition 1" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
          <connect from_op="70/30" from_port="partition 2" to_op="Delay" to_port="through 1"/>
          <connect from_op="Delay" from_port="through 1" to_op="ApplyModel" to_port="in 1"/>
          <connect from_op="ApplyModel" from_port="out 1" to_op="Eval Model" to_port="in 1"/>
          <connect from_op="ApplyModel" from_port="out 3" to_op="Eval Model" to_port="in 2"/>
          <connect from_op="Eval Model" from_port="out 1" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    Be careful with your Remember/Recall construct! Your construct is saving the last iteration and not the best model. This might be not the thing you want to do. You can simply use the model port of Optimize to get the best model.

    And why do you need the delay?

    Cheers,

    Martin
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • corkiecorkie Member Posts: 10 Contributor II
    martin, cheers for the response.
    cheers for the heads up on the remember / recall; I had missed picking up the port from the optimise as apposed to the validation.

    I thought I needed the delay to allow the optimise to complete. when I reorder the process execution it works fine without it.

    thanks for the help.
Sign In or Register to comment.