Different output results with grid parameter optimization

SaloSalo Member Posts: 6 Contributor II
edited November 2018 in Help
I am trying to get the best C and gamma combination with the grid parameter optimization like this

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
    <parameter key="logverbosity" value="all"/>
    <parameter key="logfile" value="D:\testexrff.xrff"/>
    <process expanded="true">
      <operator activated="true" class="optimize_parameters_grid" compatibility="5.3.013" expanded="true" height="94" name="GridParameterOptimization (2)" width="90" x="313" y="255">
        <list key="parameters">
          <parameter key="LibSVMLearner.C" value="[1.0;200.0;20;quadratic]"/>
          <parameter key="LibSVMLearner.gamma" value="[0.0;1;20;quadratic]"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="x_validation" compatibility="5.1.002" expanded="true" height="112" name="XValidation (3)" width="90" x="45" y="30">
            <parameter key="sampling_type" value="shuffled sampling"/>
            <process expanded="true">
              <operator activated="true" class="materialize_data" compatibility="5.3.013" expanded="true" height="76" name="Materialize Data (6)" width="90" x="45" y="30"/>
              <operator activated="true" class="support_vector_machine_libsvm" compatibility="5.3.013" expanded="true" height="76" name="LibSVMLearner (2)" width="90" x="200" y="30">
                <parameter key="kernel_type" value="sigmoid"/>
                <parameter key="gamma" value="1.0"/>
                <parameter key="C" value="200.0"/>
                <list key="class_weights"/>
              </operator>
              <connect from_port="training" to_op="Materialize Data (6)" to_port="example set input"/>
              <connect from_op="Materialize Data (6)" from_port="example set output" to_op="LibSVMLearner (2)" to_port="training set"/>
              <connect from_op="LibSVMLearner (2)" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="ModelApplier (2)" width="90" x="45" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="5.3.013" expanded="true" height="76" name="BinominalClassificationPerformance (2)" width="90" x="209" y="30">
                <parameter key="main_criterion" value="specificity"/>
                <parameter key="precision" value="true"/>
                <parameter key="recall" value="true"/>
                <parameter key="f_measure" value="true"/>
                <parameter key="false_positive" value="true"/>
                <parameter key="false_negative" value="true"/>
                <parameter key="true_positive" value="true"/>
                <parameter key="true_negative" value="true"/>
                <parameter key="sensitivity" value="true"/>
                <parameter key="specificity" value="true"/>
              </operator>
              <connect from_port="model" to_op="ModelApplier (2)" to_port="model"/>
              <connect from_port="test set" to_op="ModelApplier (2)" to_port="unlabelled data"/>
              <connect from_op="ModelApplier (2)" from_port="labelled data" to_op="BinominalClassificationPerformance (2)" to_port="labelled data"/>
              <connect from_op="BinominalClassificationPerformance (2)" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="log" compatibility="5.3.013" expanded="true" height="76" name="ProcessLog (3)" width="90" x="403" y="48">
            <parameter key="filename" value="fCoutputsig_%{a}.log"/>
            <list key="log">
              <parameter key="File" value="operator.CSVExampleSource.parameter.filename"/>
              <parameter key="Iteration" value="operator.GridParameterOptimization.value.applycount"/>
              <parameter key="gamma" value="operator.LibSVMLearner.parameter.gamma"/>
              <parameter key="C" value="operator.LibSVMLearner.parameter.C"/>
              <parameter key="Performance_main criterion" value="operator.XValidation (2).value.performance"/>
              <parameter key="Deviation" value="operator.XValidation (2).value.deviation"/>
              <parameter key="sdfg" value="operator.GridParameterOptimization.value.performance"/>
            </list>
            <parameter key="sorting_dimension" value="3"/>
          </operator>
          <connect from_port="input 1" to_op="XValidation (3)" to_port="training"/>
          <connect from_op="XValidation (3)" from_port="averagable 1" to_op="ProcessLog (3)" to_port="through 1"/>
          <connect from_op="ProcessLog (3)" from_port="through 1" to_port="performance"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
        </process>
      </operator>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
    </process>
  </operator>
</process>

It returns me this results

ParameterSet
Parameter set:

Performance:
PerformanceVector [
-----accuracy: 52.81% +/- 2.81% (mikro: 52.81%)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----precision: 58.91% +/- 7.12% (mikro: 59.29%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----recall: 18.69% +/- 6.51% (mikro: 18.69%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----f_measure: 27.84% +/- 8.62% (mikro: 28.42%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----false_positive: 25.400 +/- 9.425 (mikro: 254.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----false_negative: 161.000 +/- 12.900 (mikro: 1610.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----true_positive: 37.000 +/- 12.900 (mikro: 370.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----true_negative: 171.600 +/- 9.425 (mikro: 1716.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----sensitivity: 18.69% +/- 6.51% (mikro: 18.69%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
*****specificity: 87.11% +/- 4.78% (mikro: 87.11%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
]
LibSVMLearner.C = 41.2975
LibSVMLearner.gamma = 1.125

Now let's ignore that the results are not every good since it's not the issue here. After obtaining the C = 41.297 and gamma  = 1.125 I tried to run the svm individually with this parameters. however instead of getting the expected 52.81% I got  50.38%. Did I misunderstood anything, shouldn't by using the same C and Gamma given me by the grid parameter optimization have got the same results? I am using the 5.3 rapidminer version.

Answers

  • frasfras Member Posts: 93 Contributor II
    There is nothing wrong for me. You have to take into account the error bars also reported.
    Try "use local random seed" in the X-Validation operator then the both results should be really identically.
Sign In or Register to comment.