Options

Problem by optimization and select subprocess operator

TillTill Member Posts: 6 Contributor II
edited November 2018 in Help
Hi,
in my process I use the parameter optimization in a select subprocess operator in order to deceide whether using the optimization of the parameters or not. Surprisingly the results in optimization widely differ from not using the select subprocess operator in combination with the optimization operator (Evolutionary). Have everybody an idea what was wrong or is it a bug?

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input>
      <location/>
    </input>
    <output>
      <location/>
      <location/>
      <location/>
      <location/>
    </output>
    <macros/>
  </context>
  <operator activated="true" class="process" expanded="true" name="classificationOfWeeds">
    <process expanded="true" height="581" width="835">
      <operator activated="true" class="read_aml" expanded="true" height="60" name="loadFirstPrediction" width="90" x="715" y="120">
        <parameter key="attributes" value="C:\Users\Till\Desktop\PredictionAllData.aml"/>
        <parameter key="local_random_seed" value="-1"/>
      </operator>
      <operator activated="true" class="generate_data" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30"/>
      <operator activated="true" class="work_on_subset" expanded="true" height="76" name="Work on Subset" width="90" x="179" y="30">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="label"/>
        <parameter key="include_special_attributes" value="true"/>
        <process expanded="true" height="691" width="909">
          <operator activated="true" class="numerical_to_binominal" expanded="true" height="76" name="Numerical to Binominal" width="90" x="380" y="30"/>
          <connect from_port="exampleSet" to_op="Numerical to Binominal" to_port="example set input"/>
          <connect from_op="Numerical to Binominal" from_port="example set output" to_port="example set"/>
          <portSpacing port="source_exampleSet" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="select_subprocess" expanded="true" height="94" name="firstClassification" width="90" x="313" y="30">
        <process expanded="true" height="518" width="435">
          <operator activated="true" class="multiply" expanded="true" height="94" name="Multiply" width="90" x="45" y="30"/>
          <operator activated="true" class="optimize_parameters_evolutionary" expanded="true" height="130" name="Optimize Parameters (Evolutionary)" width="90" x="179" y="30">
            <list key="parameters">
              <parameter key="learnerTrainingFirst.C" value="[0.0;300]"/>
              <parameter key="learnerTrainingFirst.gamma" value="[0.0;1]"/>
            </list>
            <process expanded="true" height="691" width="909">
              <operator activated="true" class="x_validation" expanded="true" height="112" name="XValidation (7)" width="90" x="380" y="30">
                <parameter key="create_complete_model" value="true"/>
                <parameter key="local_random_seed" value="-1"/>
                <process expanded="true" height="518" width="440">
                  <operator activated="true" class="support_vector_machine_libsvm" expanded="true" height="76" name="learnerTrainingFirst" width="90" x="179" y="30">
                    <parameter key="gamma" value="0.5"/>
                    <parameter key="C" value="120.0"/>
                    <parameter key="cache_size" value="1000"/>
                    <list key="class_weights"/>
                    <parameter key="calculate_confidences" value="true"/>
                  </operator>
                  <connect from_port="training" to_op="learnerTrainingFirst" to_port="training set"/>
                  <connect from_op="learnerTrainingFirst" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="518" width="440">
                  <operator activated="true" class="apply_model" expanded="true" height="76" name="ModelApplier (6)" width="90" x="112" y="30">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance_classification" expanded="true" height="76" name="trainingPerformance (2)" width="90" x="242" y="30">
                    <parameter key="main_criterion" value="kappa"/>
                    <parameter key="accuracy" value="true"/>
                    <parameter key="kappa" value="true"/>
                    <list key="class_weights">
                      <parameter key="GALAP" value="2.0"/>
                      <parameter key="CIRAR" value="2.0"/>
                    </list>
                  </operator>
                  <connect from_port="model" to_op="ModelApplier (6)" to_port="model"/>
                  <connect from_port="test set" to_op="ModelApplier (6)" to_port="unlabelled data"/>
                  <connect from_op="ModelApplier (6)" from_port="labelled data" to_op="trainingPerformance (2)" to_port="labelled data"/>
                  <connect from_op="trainingPerformance (2)" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="input 1" to_op="XValidation (7)" to_port="training"/>
              <connect from_op="XValidation (7)" from_port="model" to_port="result 1"/>
              <connect from_op="XValidation (7)" from_port="training" to_port="result 2"/>
              <connect from_op="XValidation (7)" from_port="averagable 1" to_port="performance"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
              <portSpacing port="sink_result 1" spacing="0"/>
              <portSpacing port="sink_result 2" spacing="0"/>
              <portSpacing port="sink_result 3" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="subprocess" expanded="true" height="94" name="applyOptimalParameter (2)" width="90" x="313" y="30">
            <process expanded="true" height="500" width="835">
              <operator activated="true" class="set_parameters" expanded="true" height="60" name="Set Parameters (2)" width="90" x="179" y="30">
                <list key="name_map">
                  <parameter key="learnerTrainingFirst" value="optimalLearnerFirst"/>
                </list>
              </operator>
              <operator activated="true" class="x_validation" expanded="true" height="112" name="XValidation (9)" width="90" x="179" y="120">
                <parameter key="create_complete_model" value="true"/>
                <parameter key="local_random_seed" value="-1"/>
                <process expanded="true" height="518" width="440">
                  <operator activated="true" class="support_vector_machine_libsvm" expanded="true" height="76" name="optimalLearnerFirst" width="90" x="179" y="30">
                    <parameter key="gamma" value="0.3"/>
                    <parameter key="C" value="40.0"/>
                    <parameter key="cache_size" value="2000"/>
                    <list key="class_weights"/>
                    <parameter key="calculate_confidences" value="true"/>
                    <parameter key="confidence_for_multiclass" value="false"/>
                  </operator>
                  <connect from_port="training" to_op="optimalLearnerFirst" to_port="training set"/>
                  <connect from_op="optimalLearnerFirst" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="518" width="440">
                  <operator activated="true" class="apply_model" expanded="true" height="76" name="ModelApplier (8)" width="90" x="45" y="30">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance_classification" expanded="true" height="76" name="performanceFirst" width="90" x="179" y="30">
                    <parameter key="accuracy" value="true"/>
                    <parameter key="kappa" value="true"/>
                    <list key="class_weights"/>
                  </operator>
                  <operator activated="true" class="write_aml" expanded="true" height="60" name="write AML (3)" width="90" x="313" y="120">
                    <parameter key="example_set_file" value="C:\Users\Till\Desktop\PredictionAllData.dat"/>
                    <parameter key="attribute_description_file" value="C:\Users\Till\Desktop\PredictionAllData.aml"/>
                  </operator>
                  <connect from_port="model" to_op="ModelApplier (8)" to_port="model"/>
                  <connect from_port="test set" to_op="ModelApplier (8)" to_port="unlabelled data"/>
                  <connect from_op="ModelApplier (8)" from_port="labelled data" to_op="performanceFirst" to_port="labelled data"/>
                  <connect from_op="performanceFirst" from_port="performance" to_port="averagable 1"/>
                  <connect from_op="performanceFirst" from_port="example set" to_op="write AML (3)" to_port="input"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="in 1" to_op="Set Parameters (2)" to_port="parameter set"/>
              <connect from_port="in 2" to_op="XValidation (9)" to_port="training"/>
              <connect from_op="XValidation (9)" from_port="model" to_port="out 1"/>
              <connect from_op="XValidation (9)" from_port="averagable 1" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="90"/>
              <portSpacing port="sink_out 2" spacing="18"/>
              <portSpacing port="sink_out 3" spacing="0"/>
            </process>
          </operator>
          <connect from_port="input 1" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="applyOptimalParameter (2)" to_port="in 2"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Optimize Parameters (Evolutionary)" to_port="input 1"/>
          <connect from_op="Optimize Parameters (Evolutionary)" from_port="parameter" to_op="applyOptimalParameter (2)" to_port="in 1"/>
          <connect from_op="applyOptimalParameter (2)" from_port="out 1" to_port="output 1"/>
          <connect from_op="applyOptimalParameter (2)" from_port="out 2" to_port="output 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
          <portSpacing port="sink_output 3" spacing="0"/>
        </process>
        <process expanded="true" height="518" width="299">
          <operator activated="true" class="x_validation" expanded="true" height="112" name="Validation (5)" width="90" x="112" y="30">
            <process expanded="true" height="551" width="299">
              <operator activated="true" class="support_vector_machine_libsvm" expanded="true" height="76" name="SVM" width="90" x="112" y="30">
                <parameter key="gamma" value="0.15"/>
                <parameter key="C" value="108.0"/>
                <list key="class_weights"/>
              </operator>
              <connect from_port="training" to_op="SVM" to_port="training set"/>
              <connect from_op="SVM" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true" height="551" width="480">
              <operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model (5)" width="90" x="112" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_classification" expanded="true" height="76" name="performanceFirst (2)" width="90" x="246" y="30">
                <parameter key="main_criterion" value="kappa"/>
                <parameter key="accuracy" value="true"/>
                <parameter key="kappa" value="true"/>
                <list key="class_weights"/>
              </operator>
              <operator activated="true" class="write_aml" expanded="true" height="60" name="Write AML (3)" width="90" x="380" y="120">
                <parameter key="example_set_file" value="C:\Users\Till\Desktop\PredictionAllData.dat"/>
                <parameter key="attribute_description_file" value="C:\Users\Till\Desktop\PredictionAllData.aml"/>
              </operator>
              <connect from_port="model" to_op="Apply Model (5)" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model (5)" to_port="unlabelled data"/>
              <connect from_op="Apply Model (5)" from_port="labelled data" to_op="performanceFirst (2)" to_port="labelled data"/>
              <connect from_op="performanceFirst (2)" from_port="performance" to_port="averagable 1"/>
              <connect from_op="performanceFirst (2)" from_port="example set" to_op="Write AML (3)" to_port="input"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="input 1" to_op="Validation (5)" to_port="training"/>
          <connect from_op="Validation (5)" from_port="model" to_port="output 1"/>
          <connect from_op="Validation (5)" from_port="averagable 1" to_port="output 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
          <portSpacing port="sink_output 3" spacing="0"/>
        </process>
      </operator>
      <connect from_op="loadFirstPrediction" from_port="output" to_port="result 3"/>
      <connect from_op="Generate Data" from_port="output" to_op="Work on Subset" to_port="example set"/>
      <connect from_op="Work on Subset" from_port="example set" to_op="firstClassification" to_port="input 1"/>
      <connect from_op="firstClassification" from_port="output 1" to_port="result 1"/>
      <connect from_op="firstClassification" from_port="output 2" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>
Regards
Till

Answers

  • Options
    haddockhaddock Member Posts: 849 Maven
    Mmm,

    It could be useful to note that the numerical to binominal operator switches all the numerical values in the label attribute to "true".





  • Options
    TillTill Member Posts: 6 Contributor II
    Sorry, the operator data generation and numerical to binominal I only add to have a complete process. In the original process I used other data. I think data is ok.
  • Options
    landland RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2,531 Unicorn
    Hi,
    if I understood you correctly, the problem is, that the results differ when you execute the optimization within a select subprocess operator from when executing it without?

    Greetings,
      Sebastian
  • Options
    TillTill Member Posts: 6 Contributor II
    Yes, that's the problem. Why the optimization dependents on select subprocess?

    Regards
    Till
  • Options
    landland RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2,531 Unicorn
    Hi Till,
    have you tried to use a local random seed on the optimization to avoid problems resulting from an altered random number sequence?

    Greetings,
      Sebastian
Sign In or Register to comment.