generate scoring function from Contingency table values...

Fred12Fred12 Member Posts: 344 Unicorn
edited November 2018 in Help

hi,

I have a process here:

what I want to do is to extract the single values from the contingency table from Performance (Classification) operator, if thats not possible, maybe from Performance (Binomial) Classificator, because I have not seen any similar fields for contingency table in Performance (Classification) operator.

Then I want to generate a scoring-function attribute, with different costs or profits assigned to each value of the contingency table... e.g 43*true pos. - 4.77 * false positive etc...

how can I do that? I dont know how to extract those values and already played around a bit...

 

the whole dataset is in the zip file...

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.2.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve data_dmc2002_train (2)" width="90" x="45" y="187">
<parameter key="repository_entry" value="../data/data_dmc2002_train"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="7.2.001" expanded="true" height="103" name="Replace Missing Values" width="90" x="179" y="187">
<list key="columns"/>
</operator>
<operator activated="true" class="generate_weight_stratification" compatibility="7.2.001" expanded="true" height="82" name="Generate Weight (Stratification)" width="90" x="313" y="187">
<parameter key="total_weight" value="500.0"/>
</operator>
<operator activated="true" class="x_validation" compatibility="7.2.001" expanded="true" height="166" name="Validation" width="90" x="514" y="85">
<parameter key="number_of_validations" value="5"/>
<parameter key="sampling_type" value="stratified sampling"/>
<process expanded="true">
<operator activated="true" class="weka:W-J48" compatibility="7.2.000" expanded="true" height="82" name="W-J48" width="90" x="45" y="34"/>
<connect from_port="training" to_op="W-J48" to_port="training set"/>
<connect from_op="W-J48" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.2.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.001" expanded="true" height="82" name="Multiply" width="90" x="112" y="187"/>
<operator activated="true" class="performance_classification" compatibility="7.2.001" expanded="true" height="82" name="Performance" width="90" x="246" y="34">
<parameter key="kappa" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
<portSpacing port="sink_averagable 3" spacing="0"/>
<portSpacing port="sink_averagable 4" spacing="0"/>
</process>
</operator>
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve data_dmc2002_class" width="90" x="45" y="340">
<parameter key="repository_entry" value="../data/data_dmc2002_class"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve data_dmc2002_realclass" width="90" x="246" y="442">
<parameter key="repository_entry" value="../data/data_dmc2002_realclass"/>
</operator>
<operator activated="true" class="apply_model" compatibility="7.2.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="313" y="340">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="join" compatibility="7.2.001" expanded="true" height="82" name="Join" width="90" x="514" y="442">
<list key="key_attributes"/>
<parameter key="keep_both_join_attributes" value="true"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.2.001" expanded="true" height="82" name="Performance (2)" width="90" x="715" y="289">
<parameter key="kappa" value="true"/>
<list key="class_weights"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.2.001" expanded="true" height="82" name="Generate Attributes" width="90" x="849" y="442">
<list key="function_descriptions">
<parameter key="tt" value="param(&quot;perf&quot;,&quot;true_positive&quot;)"/>
</list>
</operator>
<connect from_op="Retrieve data_dmc2002_train (2)" from_port="output" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_op="Generate Weight (Stratification)" to_port="example set input"/>
<connect from_op="Generate Weight (Stratification)" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 2" to_port="result 2"/>
<connect from_op="Validation" from_port="averagable 3" to_port="result 3"/>
<connect from_op="Retrieve data_dmc2002_class" from_port="output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Retrieve data_dmc2002_realclass" from_port="output" to_op="Join" to_port="right"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Join" to_port="left"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 6"/>
<connect from_op="Join" from_port="join" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="result 5"/>
<connect from_op="Performance (2)" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
<portSpacing port="sink_result 7" spacing="0"/>
</process>
</operator>
</process>

Best Answer

  • mschmitzmschmitz Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,254 RM Data Scientist
    Solution Accepted

    Puh,

    i do not find the exact process anymore. I only have something similar I cannot post here. I will sent it by PM.

     

    ~Martin

    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany

Answers

  • mschmitzmschmitz Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,254 RM Data Scientist

    Hi,

     

    I think Performance (Costs) is doing what you want to do..

     

    The other way is doing the fp/tp calulation by hand. Have a look at this video: https://www.youtube.com/watch?v=13krj-Hb1dI . That should explain the ideas.

     

    ~Martin

    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany
  • Fred12Fred12 Member Posts: 344 Unicorn

    thanks, can you provide me with the process as file to get a better look at the process?

  • Fred12Fred12 Member Posts: 344 Unicorn

    hi,

    I now managed to use the correct scoring function with your tutorial and it works nice, I used the sample process "Churn modeling" as template,

    however just my performance for this dataset is poor ... 50% only :( and 1700 € profit for now, have to try out feature selection or so,

    I think best scores for dmc 2002 were above 7000€....

     

Sign In or Register to comment.