Options

Optimize Selection

MuehliManMuehliMan Member Posts: 85 Maven
edited November 2018 in Help
Hello again,

I found another strange behavoir, for which I hope for explanatioöns from you:

In this workflow I log the performance, generation and feature names of an opmiized feature selection taking always the best 5 attributes. The process should select the best 5 models for the first attribute. For each of the chosen attributes it builds models with any second attribute and so on., is that correct?

What about the results from generation 0?
I assume that RM is loopiing through the attributes to find the top 5 attributes. Why is the log for attribute names giving "?" then? (Maybe it is giving the already chosen attributes, but then they would not match the performance values).
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.0.11" expanded="true" name="Process">
    <process expanded="true" height="179" width="748">
      <operator activated="true" class="generate_data" compatibility="5.0.11" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
        <parameter key="number_examples" value="300"/>
        <parameter key="number_of_attributes" value="100"/>
      </operator>
      <operator activated="true" class="add_noise" compatibility="5.0.11" expanded="true" height="94" name="Add Noise" width="90" x="179" y="30">
        <list key="noise"/>
      </operator>
      <operator activated="true" class="discretize_by_user_specification" compatibility="5.0.11" expanded="true" height="94" name="Discretize" width="90" x="313" y="30">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="label"/>
        <parameter key="include_special_attributes" value="true"/>
        <list key="classes">
          <parameter key="first" value="0.5"/>
          <parameter key="last" value="Infinity"/>
        </list>
      </operator>
      <operator activated="true" class="nominal_to_binominal" compatibility="5.0.11" expanded="true" height="94" name="Nominal to Binominal" width="90" x="447" y="30">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="label"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="optimize_selection" compatibility="5.0.11" expanded="true" height="94" name="Optimize Selection" width="90" x="581" y="30">
        <parameter key="generations_without_improval" value="2"/>
        <parameter key="limit_number_of_generations" value="true"/>
        <parameter key="keep_best" value="2"/>
        <parameter key="maximum_number_of_generations" value="5"/>
        <parameter key="normalize_weights" value="false"/>
        <process expanded="true" height="715" width="1094">
          <operator activated="true" class="multiply" compatibility="5.0.11" expanded="true" height="76" name="Multiply" width="90" x="45" y="30"/>
          <operator activated="true" class="random_forest" compatibility="5.0.11" expanded="true" height="76" name="Random Forest" width="90" x="179" y="30">
            <parameter key="criterion" value="information_gain"/>
            <parameter key="minimal_gain" value="0.0"/>
            <parameter key="maximal_depth" value="5"/>
          </operator>
          <operator activated="true" class="apply_model" compatibility="5.0.11" expanded="true" height="76" name="Apply Model" width="90" x="313" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_binominal_classification" compatibility="5.0.11" expanded="true" height="76" name="Performance" width="90" x="447" y="30">
            <parameter key="f_measure" value="true"/>
            <parameter key="youden" value="true"/>
          </operator>
          <operator activated="true" class="log" compatibility="5.0.11" expanded="true" height="76" name="Log" width="90" x="581" y="30">
            <list key="log">
              <parameter key="feature_names" value="operator.Optimize Selection.value.feature_names"/>
              <parameter key="generation" value="operator.Optimize Selection.value.generation"/>
              <parameter key="accuracy" value="operator.Performance.value.accuracy"/>
              <parameter key="f_measure" value="operator.Performance.value.f_measure"/>
              <parameter key="youden" value="operator.Performance.value.youden"/>
            </list>
          </operator>
          <connect from_port="example set" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Random Forest" to_port="training set"/>
          <connect from_op="Random Forest" from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Random Forest" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
          <connect from_op="Log" from_port="through 1" to_port="performance"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Generate Data" from_port="output" to_op="Add Noise" to_port="example set input"/>
      <connect from_op="Add Noise" from_port="example set output" to_op="Discretize" to_port="example set input"/>
      <connect from_op="Discretize" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
      <connect from_op="Nominal to Binominal" from_port="example set output" to_op="Optimize Selection" to_port="example set in"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
    </process>
  </operator>
</process>

Best,
Markus

Answers

  • Options
    MuehliManMuehliMan Member Posts: 85 Maven
    Good Morning,

    I thought about my problem and found a workaround using Loop Feature Subset ITeration with all Attributes and Logging the freature names from there. You can see the result from the workflow quoted:
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.0">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.0.11" expanded="true" name="Process">
        <process expanded="true" height="179" width="748">
          <operator activated="true" class="generate_data" compatibility="5.0.11" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
            <parameter key="number_examples" value="300"/>
            <parameter key="number_of_attributes" value="100"/>
          </operator>
          <operator activated="true" class="add_noise" compatibility="5.0.11" expanded="true" height="94" name="Add Noise" width="90" x="179" y="30">
            <list key="noise"/>
          </operator>
          <operator activated="true" class="discretize_by_user_specification" compatibility="5.0.11" expanded="true" height="94" name="Discretize" width="90" x="313" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="label"/>
            <parameter key="include_special_attributes" value="true"/>
            <list key="classes">
              <parameter key="first" value="0.5"/>
              <parameter key="last" value="Infinity"/>
            </list>
          </operator>
          <operator activated="true" class="nominal_to_binominal" compatibility="5.0.11" expanded="true" height="94" name="Nominal to Binominal" width="90" x="447" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="label"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <operator activated="true" class="optimize_selection" compatibility="5.0.11" expanded="true" height="94" name="Optimize Selection" width="90" x="581" y="30">
            <parameter key="generations_without_improval" value="2"/>
            <parameter key="limit_number_of_generations" value="true"/>
            <parameter key="keep_best" value="3"/>
            <parameter key="maximum_number_of_generations" value="5"/>
            <parameter key="normalize_weights" value="false"/>
            <process expanded="true" height="715" width="1394">
              <operator activated="true" class="multiply" compatibility="5.0.11" expanded="true" height="94" name="Multiply" width="90" x="45" y="30"/>
              <operator activated="true" class="extract_macro" compatibility="5.0.11" expanded="true" height="60" name="Extract Macro" width="90" x="179" y="120">
                <parameter key="macro" value="atts"/>
                <parameter key="macro_type" value="number_of_attributes"/>
              </operator>
              <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.11" expanded="true" height="60" name="Loop Subsets" width="90" x="313" y="120">
                <parameter key="use_exact_number" value="true"/>
                <parameter key="exact_number_of_attributes" value="%{atts}"/>
                <process expanded="true" height="422" width="643">
                  <portSpacing port="source_example set" spacing="0"/>
                </process>
              </operator>
              <operator activated="true" class="random_forest" compatibility="5.0.11" expanded="true" height="76" name="Random Forest" width="90" x="179" y="30">
                <parameter key="criterion" value="information_gain"/>
                <parameter key="minimal_gain" value="0.0"/>
                <parameter key="maximal_depth" value="5"/>
              </operator>
              <operator activated="true" class="apply_model" compatibility="5.0.11" expanded="true" height="76" name="Apply Model" width="90" x="313" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="5.0.11" expanded="true" height="76" name="Performance" width="90" x="447" y="30">
                <parameter key="f_measure" value="true"/>
                <parameter key="youden" value="true"/>
              </operator>
              <operator activated="true" class="log" compatibility="5.0.11" expanded="true" height="76" name="Log" width="90" x="581" y="30">
                <list key="log">
                  <parameter key="feature_names" value="operator.Optimize Selection.value.feature_names"/>
                  <parameter key="generation" value="operator.Optimize Selection.value.generation"/>
                  <parameter key="accuracy" value="operator.Performance.value.accuracy"/>
                  <parameter key="f_measure" value="operator.Performance.value.f_measure"/>
                  <parameter key="youden" value="operator.Performance.value.youden"/>
                  <parameter key="chosen" value="operator.Loop Subsets.value.feature_names"/>
                </list>
              </operator>
              <connect from_port="example set" to_op="Multiply" to_port="input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Random Forest" to_port="training set"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Extract Macro" to_port="example set"/>
              <connect from_op="Extract Macro" from_port="example set" to_op="Loop Subsets" to_port="example set"/>
              <connect from_op="Random Forest" from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_op="Random Forest" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
              <connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
              <connect from_op="Log" from_port="through 1" to_port="performance"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Generate Data" from_port="output" to_op="Add Noise" to_port="example set input"/>
          <connect from_op="Add Noise" from_port="example set output" to_op="Discretize" to_port="example set input"/>
          <connect from_op="Discretize" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
          <connect from_op="Nominal to Binominal" from_port="example set output" to_op="Optimize Selection" to_port="example set in"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
        </process>
      </operator>
    </process>
    So the feature names and the chosen should be the same, obviously they are not.

    Best,
    Markus
  • Options
    MuehliManMuehliMan Member Posts: 85 Maven
    One additional note:

    If I use Decision Tree as learner it gives no split for the first generation (only 1 attribute) even if I reduce minimal gain to 0.
    If I replace decision tree by decision stump I get a split, why is that? How do perform a decision tree that performs a split with one attribute too?

    Best,
    Markus
Sign In or Register to comment.