generate scoring function from Contingency table values...
hi,
I have a process here:
what I want to do is to extract the single values from the contingency table from Performance (Classification) operator, if thats not possible, maybe from Performance (Binomial) Classificator, because I have not seen any similar fields for contingency table in Performance (Classification) operator.
Then I want to generate a scoring-function attribute, with different costs or profits assigned to each value of the contingency table... e.g 43*true pos. - 4.77 * false positive etc...
how can I do that? I dont know how to extract those values and already played around a bit...
the whole dataset is in the zip file...
<?xml version="1.0" encoding="UTF-8"?><process version="7.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.2.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve data_dmc2002_train (2)" width="90" x="45" y="187">
<parameter key="repository_entry" value="../data/data_dmc2002_train"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="7.2.001" expanded="true" height="103" name="Replace Missing Values" width="90" x="179" y="187">
<list key="columns"/>
</operator>
<operator activated="true" class="generate_weight_stratification" compatibility="7.2.001" expanded="true" height="82" name="Generate Weight (Stratification)" width="90" x="313" y="187">
<parameter key="total_weight" value="500.0"/>
</operator>
<operator activated="true" class="x_validation" compatibility="7.2.001" expanded="true" height="166" name="Validation" width="90" x="514" y="85">
<parameter key="number_of_validations" value="5"/>
<parameter key="sampling_type" value="stratified sampling"/>
<process expanded="true">
<operator activated="true" class="weka:W-J48" compatibility="7.2.000" expanded="true" height="82" name="W-J48" width="90" x="45" y="34"/>
<connect from_port="training" to_op="W-J48" to_port="training set"/>
<connect from_op="W-J48" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.2.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.001" expanded="true" height="82" name="Multiply" width="90" x="112" y="187"/>
<operator activated="true" class="performance_classification" compatibility="7.2.001" expanded="true" height="82" name="Performance" width="90" x="246" y="34">
<parameter key="kappa" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
<portSpacing port="sink_averagable 3" spacing="0"/>
<portSpacing port="sink_averagable 4" spacing="0"/>
</process>
</operator>
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve data_dmc2002_class" width="90" x="45" y="340">
<parameter key="repository_entry" value="../data/data_dmc2002_class"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve data_dmc2002_realclass" width="90" x="246" y="442">
<parameter key="repository_entry" value="../data/data_dmc2002_realclass"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.2.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="313" y="340">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="join" compatibility="7.2.001" expanded="true" height="82" name="Join" width="90" x="514" y="442">
<list key="key_attributes"/>
<parameter key="keep_both_join_attributes" value="true"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.2.001" expanded="true" height="82" name="Performance (2)" width="90" x="715" y="289">
<parameter key="kappa" value="true"/>
<list key="class_weights"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.2.001" expanded="true" height="82" name="Generate Attributes" width="90" x="849" y="442">
<list key="function_descriptions">
<parameter key="tt" value="param("perf","true_positive")"/>
</list>
</operator>
<connect from_op="Retrieve data_dmc2002_train (2)" from_port="output" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_op="Generate Weight (Stratification)" to_port="example set input"/>
<connect from_op="Generate Weight (Stratification)" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 2" to_port="result 2"/>
<connect from_op="Validation" from_port="averagable 3" to_port="result 3"/>
<connect from_op="Retrieve data_dmc2002_class" from_port="output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Retrieve data_dmc2002_realclass" from_port="output" to_op="Join" to_port="right"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Join" to_port="left"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 6"/>
<connect from_op="Join" from_port="join" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="result 5"/>
<connect from_op="Performance (2)" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
<portSpacing port="sink_result 7" spacing="0"/>
</process>
</operator>
</process>
Best Answer
-
MartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,529 RM Data Scientist
Puh,
i do not find the exact process anymore. I only have something similar I cannot post here. I will sent it by PM.
~Martin
- Sr. Director Data Solutions, Altair RapidMiner -
Dortmund, Germany0
Answers
Hi,
I think Performance (Costs) is doing what you want to do..
The other way is doing the fp/tp calulation by hand. Have a look at this video: https://www.youtube.com/watch?v=13krj-Hb1dI . That should explain the ideas.
~Martin
Dortmund, Germany
thanks, can you provide me with the process as file to get a better look at the process?
hi,
I now managed to use the correct scoring function with your tutorial and it works nice, I used the sample process "Churn modeling" as template,
however just my performance for this dataset is poor ... 50% only and 1700 € profit for now, have to try out feature selection or so,
I think best scores for dmc 2002 were above 7000€....