ROC-curve
Hello,
I want to compare ROC curves with the Golf data set. I've transformed its attributes to be binominal. When i get the chart of the comparison, there's no curve. Could someone please help.
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="retrieve" compatibility="8.1.000" expanded="true" height="68" name="Retrieve Golf" width="90" x="112" y="136">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="nominal_to_binominal" compatibility="8.1.000" expanded="true" height="103" name="Nominal to Binominal" width="90" x="246" y="136">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="transform_binominal" value="false"/>
<parameter key="use_underscore_in_name" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="numerical_to_binominal" compatibility="8.1.000" expanded="true" height="82" name="Numerical to Binominal" width="90" x="380" y="136">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="real"/>
<parameter key="block_type" value="value_series"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_series_end"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="min" value="0.0"/>
<parameter key="max" value="0.0"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="compare_rocs" compatibility="8.1.000" expanded="true" height="82" name="Compare ROCs" width="90" x="514" y="136">
<parameter key="number_of_folds" value="10"/>
<parameter key="split_ratio" value="0.7"/>
<parameter key="sampling_type" value="stratified sampling"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="use_example_weights" value="true"/>
<parameter key="roc_bias" value="optimistic"/>
<process expanded="true">
<operator activated="true" class="naive_bayes" compatibility="8.1.000" expanded="true" height="82" name="Naive Bayes" width="90" x="112" y="30">
<parameter key="laplace_correction" value="true"/>
</operator>
<operator activated="true" class="concurrency:parallel_decision_tree" compatibility="8.1.000" expanded="true" height="103" name="Decision Tree" width="90" x="112" y="136">
<parameter key="criterion" value="gain_ratio"/>
<parameter key="maximal_depth" value="20"/>
<parameter key="apply_pruning" value="true"/>
<parameter key="confidence" value="0.25"/>
<parameter key="apply_prepruning" value="true"/>
<parameter key="minimal_gain" value="0.1"/>
<parameter key="minimal_leaf_size" value="2"/>
<parameter key="minimal_size_for_split" value="4"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
</operator>
<connect from_port="train 1" to_op="Naive Bayes" to_port="training set"/>
<connect from_port="train 2" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model 1"/>
<connect from_op="Decision Tree" from_port="model" to_port="model 2"/>
<portSpacing port="source_train 1" spacing="0"/>
<portSpacing port="source_train 2" spacing="0"/>
<portSpacing port="source_train 3" spacing="0"/>
<portSpacing port="sink_model 1" spacing="0"/>
<portSpacing port="sink_model 2" spacing="0"/>
<portSpacing port="sink_model 3" spacing="0"/>
</process>
</operator>
</process>
Best Answer
-
To see the Performance results, just connect the PER port from the Cross Validation to the RES output.
1
Answers
-
Your XML is not working for me. Please open the XML view and copy and paste from there.
0 -
@domi_wiese Did you run the tutorial for the Compare ROCs operator like I suggested? That should show you how you need to configure the operator to get what you want. If so, what are you trying to do differently with the Golf dataset that isn't working?
0 -
Hello,
Now I have my process.
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Root">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.1.000" expanded="true" height="68" name="Retrieve Golf" width="90" x="112" y="136">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="8.1.000" expanded="true" height="103" name="Nominal to Binominal" width="90" x="246" y="136"/>
<operator activated="true" class="numerical_to_binominal" compatibility="8.1.000" expanded="true" height="82" name="Numerical to Binominal" width="90" x="380" y="136"/>
<operator activated="true" class="compare_rocs" compatibility="8.1.000" expanded="true" height="82" name="Compare ROCs" width="90" x="514" y="136">
<process expanded="true">
<operator activated="true" class="naive_bayes" compatibility="8.1.000" expanded="true" height="82" name="Naive Bayes" width="90" x="112" y="30"/>
<operator activated="true" class="concurrency:parallel_decision_tree" compatibility="8.1.000" expanded="true" height="103" name="Decision Tree" width="90" x="112" y="136"/>
<connect from_port="train 1" to_op="Naive Bayes" to_port="training set"/>
<connect from_port="train 2" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model 1"/>
<connect from_op="Decision Tree" from_port="model" to_port="model 2"/>
<portSpacing port="source_train 1" spacing="0"/>
<portSpacing port="source_train 2" spacing="72"/>
<portSpacing port="source_train 3" spacing="72"/>
<portSpacing port="sink_model 1" spacing="0"/>
<portSpacing port="sink_model 2" spacing="72"/>
<portSpacing port="sink_model 3" spacing="72"/>
</process>
</operator>
<connect from_op="Retrieve Golf" from_port="output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="Numerical to Binominal" to_port="example set input"/>
<connect from_op="Numerical to Binominal" from_port="example set output" to_op="Compare ROCs" to_port="example set"/>
<connect from_op="Compare ROCs" from_port="exampleSet" to_port="result 1"/>
<connect from_op="Compare ROCs" from_port="rocComparison" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="108"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>0 -
@domi_wiese it doesn't work because you are using 10 folds and you only have 14 examples. The dataset is too small, try the sonar dataset.
0 -
Hi all,
For me, this process with the Golf dataset is working - no error is raising - but like @domi_wiese no ROC curve is displayed.
But you're right @Thomas_Ott, in this special case, when number of folds is greater than the number of members in each class,
why Rapidminer don't raise an error (or a warning) ? and so how is performed the cross - validation ?
@domi_wiese, you can decrease the parameter number of folds of the Compare ROC operator
to satisfy the condition above (for example you can set number of folds = 5).
Regards,
Lionel
0 -
@lionelderkrikor it might be working for you because of your seed, mine is probably different. Why didn't RM through an error? That's a more interesting question and I don't know the answer to that.
0 -
Hello @Thomas_Ott @lionelderkrikor @Telcontar120,
thank you for bringing my attention to this matter. Now I get what the problem was.
I've got just one more question. It's about creating a lift chart with a binominal performance operator.
I'm not sure if my process is even right, and why the performance operator isn't working at all?
I would be very thankful if someone could send me the correc process.
0 -
@Thomas_Ott @lionelderkrikor @Telcontar120
sorry, forgot to put the process online.
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="generate_direct_mailing_data" compatibility="8.1.000" expanded="true" height="68" name="Generate Direct Mailing Data" width="90" x="45" y="34">
<parameter key="number_examples" value="10000"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="nominal_to_binominal" compatibility="8.1.000" expanded="true" height="103" name="Nominal to Binominal" width="90" x="179" y="34">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="transform_binominal" value="false"/>
<parameter key="use_underscore_in_name" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="split_validation" compatibility="8.1.000" expanded="true" height="124" name="Validation" width="90" x="447" y="136">
<parameter key="create_complete_model" value="false"/>
<parameter key="split" value="relative"/>
<parameter key="split_ratio" value="0.7"/>
<parameter key="training_set_size" value="100"/>
<parameter key="test_set_size" value="-1"/>
<parameter key="sampling_type" value="shuffled sampling"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="2000"/>
<process expanded="true">
<operator activated="true" class="naive_bayes" compatibility="8.1.000" expanded="true" height="82" name="Naive Bayes" width="90" x="112" y="34">
<parameter key="laplace_correction" value="true"/>
</operator>
<connect from_port="training" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="create_lift_chart" compatibility="8.1.000" expanded="true" height="103" name="Create Lift Chart" width="90" x="45" y="34">
<parameter key="target_class" value="response"/>
<parameter key="binning_type" value="frequency"/>
<parameter key="number_of_bins" value="10"/>
<parameter key="size_of_bins" value="1000"/>
<parameter key="automatic_number_of_digits" value="true"/>
<parameter key="number_of_digits" value="-1"/>
<parameter key="show_bar_labels" value="true"/>
<parameter key="show_cumulative_labels" value="false"/>
<parameter key="rotate_labels" value="false"/>
</operator>
<operator activated="true" class="apply_model" compatibility="8.1.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="187">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="8.1.000" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="187">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="false"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="false"/>
<parameter key="recall" value="false"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="false"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="remember" compatibility="8.1.000" expanded="true" height="68" name="Remember" width="90" x="246" y="34">
<parameter key="name" value="Lift Chart"/>
<parameter key="io_object" value="LiftParetoChart"/>
<parameter key="store_which" value="1"/>
<parameter key="remove_from_process" value="true"/>
</operator>
<connect from_port="model" to_op="Create Lift Chart" to_port="model"/>
<connect from_port="test set" to_op="Create Lift Chart" to_port="example set"/>
<connect from_op="Create Lift Chart" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Create Lift Chart" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Create Lift Chart" from_port="lift pareto chart" to_op="Remember" to_port="store"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<operator activated="true" class="recall" compatibility="8.1.000" expanded="true" height="68" name="Recall" width="90" x="447" y="34">
<parameter key="name" value="Lift Chart"/>
<parameter key="io_object" value="LiftParetoChart"/>
<parameter key="remove_from_store" value="true"/>
</operator>
</process>0 -
Hi @domi_wiese,
I can not load your last XML code in RapidMiner.
To build a lift chart, you can for example use the Create Lift Chart operator.
Best regards,
Lionel
0 -
Hi,
@lionelderkrikor @Thomas_Ott @Telcontar120
I'm really sorry for my mistakes. I hope it works now.
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_direct_mailing_data" compatibility="8.1.000" expanded="true" height="68" name="Generate Direct Mailing Data" width="90" x="45" y="34">
<parameter key="number_examples" value="10000"/>
<parameter key="use_local_random_seed" value="true"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="8.1.000" expanded="true" height="103" name="Nominal to Binominal" width="90" x="179" y="34"/>
<operator activated="true" class="split_validation" compatibility="8.1.000" expanded="true" height="124" name="Validation" width="90" x="447" y="136">
<parameter key="sampling_type" value="shuffled sampling"/>
<parameter key="local_random_seed" value="2000"/>
<process expanded="true">
<operator activated="true" class="naive_bayes" compatibility="8.1.000" expanded="true" height="82" name="Naive Bayes" width="90" x="112" y="34"/>
<connect from_port="training" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="create_lift_chart" compatibility="8.1.000" expanded="true" height="103" name="Create Lift Chart" width="90" x="45" y="34">
<parameter key="target_class" value="response"/>
</operator>
<operator activated="true" class="apply_model" compatibility="8.1.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="187">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="8.1.000" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="187"/>
<operator activated="true" class="remember" compatibility="8.1.000" expanded="true" height="68" name="Remember" width="90" x="246" y="34">
<parameter key="name" value="Lift Chart"/>
<parameter key="io_object" value="LiftParetoChart"/>
</operator>
<connect from_port="model" to_op="Create Lift Chart" to_port="model"/>
<connect from_port="test set" to_op="Create Lift Chart" to_port="example set"/>
<connect from_op="Create Lift Chart" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Create Lift Chart" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Create Lift Chart" from_port="lift pareto chart" to_op="Remember" to_port="store"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="recall" compatibility="8.1.000" expanded="true" height="68" name="Recall" width="90" x="447" y="34">
<parameter key="name" value="Lift Chart"/>
<parameter key="io_object" value="LiftParetoChart"/>
</operator>
<connect from_op="Generate Direct Mailing Data" from_port="output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Recall" from_port="result" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>0 -
To see the Performance results, just connect the PER port from the Cross Validation to the RES output.
1 -
Hi @domi_wiese,
You can use the Cross Validation operator instead the Split Validation operator : It is considered as a best practice.
Best regards,
Lionel
1 -
Hi,
Thank you very much! I got it!
1