RapidMiner

RapidMiner

Different output results with grid parameter optimization

Contributor II

Different output results with grid parameter optimization

I am trying to get the best C and gamma combination with the grid parameter optimization like this



<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
    <parameter key="logverbosity" value="all"/>
    <parameter key="logfile" value="D:\testexrff.xrff"/>
    <process expanded="true">
      <operator activated="true" class="optimize_parameters_grid" compatibility="5.3.013" expanded="true" height="94" name="GridParameterOptimization (2)" width="90" x="313" y="255">
        <list key="parameters">
          <parameter key="LibSVMLearner.C" value="[1.0;200.0;20;quadratic]"/>
          <parameter key="LibSVMLearner.gamma" value="[0.0;1;20;quadratic]"/>
        </list>
        <process expanded="true">
          <operator activated="true" class="x_validation" compatibility="5.1.002" expanded="true" height="112" name="XValidation (3)" width="90" x="45" y="30">
            <parameter key="sampling_type" value="shuffled sampling"/>
            <process expanded="true">
              <operator activated="true" class="materialize_data" compatibility="5.3.013" expanded="true" height="76" name="Materialize Data (6)" width="90" x="45" y="30"/>
              <operator activated="true" class="support_vector_machine_libsvm" compatibility="5.3.013" expanded="true" height="76" name="LibSVMLearner (2)" width="90" x="200" y="30">
                <parameter key="kernel_type" value="sigmoid"/>
                <parameter key="gamma" value="1.0"/>
                <parameter key="C" value="200.0"/>
                <list key="class_weights"/>
              </operator>
              <connect from_port="training" to_op="Materialize Data (6)" to_port="example set input"/>
              <connect from_op="Materialize Data (6)" from_port="example set output" to_op="LibSVMLearner (2)" to_port="training set"/>
              <connect from_op="LibSVMLearner (2)" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="ModelApplier (2)" width="90" x="45" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="5.3.013" expanded="true" height="76" name="BinominalClassificationPerformance (2)" width="90" x="209" y="30">
                <parameter key="main_criterion" value="specificity"/>
                <parameter key="precision" value="true"/>
                <parameter key="recall" value="true"/>
                <parameter key="f_measure" value="true"/>
                <parameter key="false_positive" value="true"/>
                <parameter key="false_negative" value="true"/>
                <parameter key="true_positive" value="true"/>
                <parameter key="true_negative" value="true"/>
                <parameter key="sensitivity" value="true"/>
                <parameter key="specificity" value="true"/>
              </operator>
              <connect from_port="model" to_op="ModelApplier (2)" to_port="model"/>
              <connect from_port="test set" to_op="ModelApplier (2)" to_port="unlabelled data"/>
              <connect from_op="ModelApplier (2)" from_port="labelled data" to_op="BinominalClassificationPerformance (2)" to_port="labelled data"/>
              <connect from_op="BinominalClassificationPerformance (2)" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="log" compatibility="5.3.013" expanded="true" height="76" name="ProcessLog (3)" width="90" x="403" y="48">
            <parameter key="filename" value="fCoutputsig_%{a}.log"/>
            <list key="log">
              <parameter key="File" value="operator.CSVExampleSource.parameter.filename"/>
              <parameter key="Iteration" value="operator.GridParameterOptimization.value.applycount"/>
              <parameter key="gamma" value="operator.LibSVMLearner.parameter.gamma"/>
              <parameter key="C" value="operator.LibSVMLearner.parameter.C"/>
              <parameter key="Performance_main criterion" value="operator.XValidation (2).value.performance"/>
              <parameter key="Deviation" value="operator.XValidation (2).value.deviation"/>
              <parameter key="sdfg" value="operator.GridParameterOptimization.value.performance"/>
            </list>
            <parameter key="sorting_dimension" value="3"/>
          </operator>
          <connect from_port="input 1" to_op="XValidation (3)" to_port="training"/>
          <connect from_op="XValidation (3)" from_port="averagable 1" to_op="ProcessLog (3)" to_port="through 1"/>
          <connect from_op="ProcessLog (3)" from_port="through 1" to_port="performance"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
        </process>
      </operator>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
    </process>
  </operator>
</process>



It returns me this results


ParameterSet
Parameter set:

Performance:
PerformanceVector [
-----accuracy: 52.81% +/- 2.81% (mikro: 52.81%)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----precision: 58.91% +/- 7.12% (mikro: 59.29%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----recall: 18.69% +/- 6.51% (mikro: 18.69%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----f_measure: 27.84% +/- 8.62% (mikro: 28.42%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----false_positive: 25.400 +/- 9.425 (mikro: 254.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----false_negative: 161.000 +/- 12.900 (mikro: 1610.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----true_positive: 37.000 +/- 12.900 (mikro: 370.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----true_negative: 171.600 +/- 9.425 (mikro: 1716.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----sensitivity: 18.69% +/- 6.51% (mikro: 18.69%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
*****specificity: 87.11% +/- 4.78% (mikro: 87.11%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
]
LibSVMLearner.C = 41.2975
LibSVMLearner.gamma = 1.125



Now let's ignore that the results are not every good since it's not the issue here. After obtaining the C = 41.297 and gamma  = 1.125 I tried to run the svm individually with this parameters. however instead of getting the expected 52.81% I got  50.38%. Did I misunderstood anything, shouldn't by using the same C and Gamma given me by the grid parameter optimization have got the same results? I am using the 5.3 rapidminer version.
1 REPLY
Regular Contributor

Re: Different output results with grid parameter optimization

There is nothing wrong for me. You have to take into account the error bars also reported.
Try "use local random seed" in the X-Validation operator then the both results should be really identically.