Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

parameter Optimization Problem

can_yucebascan_yucebas Member Posts: 7 Contributor II
Hi to all,

I use parameter optimization (grid search) to decide the optimum minimal gain to be used in ID3 Tree. I ran the optimization, after it finished, in performance window it shows the optimum value for minimal gain is 4.107. But when I look to log file I can see that other values with higher perf values.

In addtion when I build ID3 Tree with same data set and parameters and for minimal gain I use 4.107, I can not get the same perf results that The paramater optimization perf vector shows.

So how can I trust the optimization?

Answers

  • SkirzynskiSkirzynski Member Posts: 164 Maven
    I can not reproduce you behavior. Here is my process setup:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.3.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.3.000" expanded="true" name="Root">
        <process expanded="true" height="431" width="748">
          <operator activated="true" class="retrieve" compatibility="5.3.000" expanded="true" height="60" name="Retrieve Iris" width="90" x="45" y="30">
            <parameter key="repository_entry" value="//Samples/data/Iris"/>
          </operator>
          <operator activated="true" class="optimize_parameters_grid" compatibility="5.3.000" expanded="true" height="94" name="Optimize Parameters (Grid)" width="90" x="179" y="30">
            <list key="parameters">
              <parameter key="Learner.minimal_gain" value="[0.1;20;100;linear]"/>
            </list>
            <process expanded="true" height="521" width="748">
              <operator activated="true" class="x_validation" compatibility="5.3.000" expanded="true" height="112" name="X-Validation" width="90" x="45" y="30">
                <parameter key="sampling_type" value="shuffled sampling"/>
                <process expanded="true" height="398" width="327">
                  <operator activated="true" class="decision_tree" compatibility="5.3.000" expanded="true" height="76" name="Learner" width="90" x="112" y="30">
                    <parameter key="minimal_gain" value="20.0"/>
                  </operator>
                  <connect from_port="training" to_op="Learner" to_port="training set"/>
                  <connect from_op="Learner" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="398" width="327">
                  <operator activated="true" class="apply_model" compatibility="5.3.000" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance" compatibility="5.3.000" expanded="true" height="76" name="Performance" width="90" x="179" y="30"/>
                  <connect from_port="model" to_op="Apply Model" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
                  <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
                  <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="performance_to_data" compatibility="5.3.000" expanded="true" height="76" name="Performance to Data" width="90" x="179" y="75"/>
              <operator activated="true" class="extract_log_value" compatibility="5.3.000" expanded="true" height="60" name="Extract Log Value" width="90" x="313" y="30">
                <parameter key="attribute_name" value="Value"/>
                <parameter key="example_index" value="1"/>
              </operator>
              <operator activated="true" class="log" compatibility="5.3.000" expanded="true" height="76" name="Log" width="90" x="447" y="120">
                <list key="log">
                  <parameter key="minimal_gain" value="operator.Learner.parameter.minimal_gain"/>
                  <parameter key="accuracy" value="operator.Extract Log Value.value.data_value"/>
                </list>
              </operator>
              <connect from_port="input 1" to_op="X-Validation" to_port="training"/>
              <connect from_op="X-Validation" from_port="averagable 1" to_op="Performance to Data" to_port="performance vector"/>
              <connect from_op="Performance to Data" from_port="example set" to_op="Extract Log Value" to_port="example set"/>
              <connect from_op="Performance to Data" from_port="performance vector" to_op="Log" to_port="through 1"/>
              <connect from_op="Log" from_port="through 1" to_port="performance"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_performance" spacing="36"/>
              <portSpacing port="sink_result 1" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="set_parameters" compatibility="5.3.000" expanded="true" height="76" name="Set Parameters" width="90" x="313" y="75">
            <list key="name_map">
              <parameter key="Learner" value="OptimalLearner"/>
            </list>
          </operator>
          <operator activated="true" class="retrieve" compatibility="5.3.000" expanded="true" height="60" name="Retrieve Iris (2)" width="90" x="45" y="210">
            <parameter key="repository_entry" value="//Samples/data/Iris"/>
          </operator>
          <operator activated="true" class="decision_tree" compatibility="5.3.000" expanded="true" height="76" name="OptimalLearner" width="90" x="179" y="210"/>
          <connect from_op="Retrieve Iris" from_port="output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
          <connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_op="Set Parameters" to_port="parameter set"/>
          <connect from_op="Set Parameters" from_port="parameter set" to_port="result 1"/>
          <connect from_op="Retrieve Iris (2)" from_port="output" to_op="OptimalLearner" to_port="training set"/>
          <connect from_op="OptimalLearner" from_port="model" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    Can you provide a process with a minimal example of your problem?
Sign In or Register to comment.