The RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent licensing related requests from Students/Faculty members, please use the Altair academic forum here.
Different output results with grid parameter optimization
I am trying to get the best C and gamma combination with the grid parameter optimization like this
It returns me this results
Now let's ignore that the results are not every good since it's not the issue here. After obtaining the C = 41.297 and gamma = 1.125 I tried to run the svm individually with this parameters. however instead of getting the expected 52.81% I got 50.38%. Did I misunderstood anything, shouldn't by using the same C and Gamma given me by the grid parameter optimization have got the same results? I am using the 5.3 rapidminer version.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
<parameter key="logverbosity" value="all"/>
<parameter key="logfile" value="D:\testexrff.xrff"/>
<process expanded="true">
<operator activated="true" class="optimize_parameters_grid" compatibility="5.3.013" expanded="true" height="94" name="GridParameterOptimization (2)" width="90" x="313" y="255">
<list key="parameters">
<parameter key="LibSVMLearner.C" value="[1.0;200.0;20;quadratic]"/>
<parameter key="LibSVMLearner.gamma" value="[0.0;1;20;quadratic]"/>
</list>
<process expanded="true">
<operator activated="true" class="x_validation" compatibility="5.1.002" expanded="true" height="112" name="XValidation (3)" width="90" x="45" y="30">
<parameter key="sampling_type" value="shuffled sampling"/>
<process expanded="true">
<operator activated="true" class="materialize_data" compatibility="5.3.013" expanded="true" height="76" name="Materialize Data (6)" width="90" x="45" y="30"/>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="5.3.013" expanded="true" height="76" name="LibSVMLearner (2)" width="90" x="200" y="30">
<parameter key="kernel_type" value="sigmoid"/>
<parameter key="gamma" value="1.0"/>
<parameter key="C" value="200.0"/>
<list key="class_weights"/>
</operator>
<connect from_port="training" to_op="Materialize Data (6)" to_port="example set input"/>
<connect from_op="Materialize Data (6)" from_port="example set output" to_op="LibSVMLearner (2)" to_port="training set"/>
<connect from_op="LibSVMLearner (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="ModelApplier (2)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="5.3.013" expanded="true" height="76" name="BinominalClassificationPerformance (2)" width="90" x="209" y="30">
<parameter key="main_criterion" value="specificity"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="f_measure" value="true"/>
<parameter key="false_positive" value="true"/>
<parameter key="false_negative" value="true"/>
<parameter key="true_positive" value="true"/>
<parameter key="true_negative" value="true"/>
<parameter key="sensitivity" value="true"/>
<parameter key="specificity" value="true"/>
</operator>
<connect from_port="model" to_op="ModelApplier (2)" to_port="model"/>
<connect from_port="test set" to_op="ModelApplier (2)" to_port="unlabelled data"/>
<connect from_op="ModelApplier (2)" from_port="labelled data" to_op="BinominalClassificationPerformance (2)" to_port="labelled data"/>
<connect from_op="BinominalClassificationPerformance (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="5.3.013" expanded="true" height="76" name="ProcessLog (3)" width="90" x="403" y="48">
<parameter key="filename" value="fCoutputsig_%{a}.log"/>
<list key="log">
<parameter key="File" value="operator.CSVExampleSource.parameter.filename"/>
<parameter key="Iteration" value="operator.GridParameterOptimization.value.applycount"/>
<parameter key="gamma" value="operator.LibSVMLearner.parameter.gamma"/>
<parameter key="C" value="operator.LibSVMLearner.parameter.C"/>
<parameter key="Performance_main criterion" value="operator.XValidation (2).value.performance"/>
<parameter key="Deviation" value="operator.XValidation (2).value.deviation"/>
<parameter key="sdfg" value="operator.GridParameterOptimization.value.performance"/>
</list>
<parameter key="sorting_dimension" value="3"/>
</operator>
<connect from_port="input 1" to_op="XValidation (3)" to_port="training"/>
<connect from_op="XValidation (3)" from_port="averagable 1" to_op="ProcessLog (3)" to_port="through 1"/>
<connect from_op="ProcessLog (3)" from_port="through 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
It returns me this results
ParameterSet
Parameter set:
Performance:
PerformanceVector [
-----accuracy: 52.81% +/- 2.81% (mikro: 52.81%)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----precision: 58.91% +/- 7.12% (mikro: 59.29%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----recall: 18.69% +/- 6.51% (mikro: 18.69%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----f_measure: 27.84% +/- 8.62% (mikro: 28.42%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----false_positive: 25.400 +/- 9.425 (mikro: 254.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----false_negative: 161.000 +/- 12.900 (mikro: 1610.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----true_positive: 37.000 +/- 12.900 (mikro: 370.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----true_negative: 171.600 +/- 9.425 (mikro: 1716.000) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
-----sensitivity: 18.69% +/- 6.51% (mikro: 18.69%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
*****specificity: 87.11% +/- 4.78% (mikro: 87.11%) (positive class: N)
ConfusionMatrix:
True: S N
S: 1716 1610
N: 254 370
]
LibSVMLearner.C = 41.2975
LibSVMLearner.gamma = 1.125
Now let's ignore that the results are not every good since it's not the issue here. After obtaining the C = 41.297 and gamma = 1.125 I tried to run the svm individually with this parameters. however instead of getting the expected 52.81% I got 50.38%. Did I misunderstood anything, shouldn't by using the same C and Gamma given me by the grid parameter optimization have got the same results? I am using the 5.3 rapidminer version.
0
Answers
Try "use local random seed" in the X-Validation operator then the both results should be really identically.