🦉🦉   WOOT WOOT!   RAPIDMINER WISDOM 2020 EARLY BIRD REGISTRATION ENDS FRIDAY DEC 13!   REGISTER NOW!   🦉🦉

can someone share the steps for calculating the MCC value

induindu Member Posts: 9 Newbie
can someone share the steps for calculating the MCC value or avg MCC value on rapidminer

Answers

  • varunm1varunm1 Moderator, Member Posts: 965   Unicorn
    edited November 18
    Hello @indu

    Can you be more specific? What is MCC? Matthews Correlation Coefficient?
    Tghadially
  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 909   Unicorn
    Hi @indu;

    If Varun is right, you can use : 
     -  a Confusion Matrix to Exampleset operator,
     -  4 Extract Macro operators (to extract the TP, TN, FP and FN)
     - a Generate Macro operator to calculate the MCC according to the TP,TN,FP,FN
     - a Create Exampleset to display the MCC as exampleset in the results panel

    Here a process implementing this method (to adapt to your own data) : 

    <?xml version="1.0" encoding="UTF-8"?><process version="9.5.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.5.000" expanded="true" height="68" name="Retrieve Golf" origin="GENERATED_TUTORIAL" width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Samples/data/Golf"/>
          </operator>
          <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" origin="GENERATED_TUTORIAL" width="90" x="447" y="34">
            <parameter key="split_on_batch_attribute" value="false"/>
            <parameter key="leave_one_out" value="false"/>
            <parameter key="number_of_folds" value="10"/>
            <parameter key="sampling_type" value="stratified sampling"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.4.000" expanded="true" height="103" name="Decision Tree" origin="GENERATED_TUTORIAL" width="90" x="179" y="34">
                <parameter key="criterion" value="gain_ratio"/>
                <parameter key="maximal_depth" value="10"/>
                <parameter key="apply_pruning" value="true"/>
                <parameter key="confidence" value="0.1"/>
                <parameter key="apply_prepruning" value="true"/>
                <parameter key="minimal_gain" value="0.01"/>
                <parameter key="minimal_leaf_size" value="2"/>
                <parameter key="minimal_size_for_split" value="4"/>
                <parameter key="number_of_prepruning_alternatives" value="3"/>
              </operator>
              <connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
              <connect from_op="Decision Tree" from_port="model" to_port="model"/>
              <portSpacing port="source_training set" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
              <description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="9.5.000" expanded="true" height="82" name="Apply Model" origin="GENERATED_TUTORIAL" width="90" x="45" y="34">
                <list key="application_parameters"/>
                <parameter key="create_view" value="false"/>
              </operator>
              <operator activated="true" class="performance_binominal_classification" compatibility="9.5.000" expanded="true" height="82" name="Performance (2)" origin="GENERATED_TUTORIAL" width="90" x="313" y="34">
                <parameter key="manually_set_positive_class" value="false"/>
                <parameter key="main_criterion" value="first"/>
                <parameter key="accuracy" value="true"/>
                <parameter key="classification_error" value="false"/>
                <parameter key="kappa" value="false"/>
                <parameter key="AUC (optimistic)" value="false"/>
                <parameter key="AUC" value="true"/>
                <parameter key="AUC (pessimistic)" value="true"/>
                <parameter key="precision" value="false"/>
                <parameter key="recall" value="false"/>
                <parameter key="lift" value="false"/>
                <parameter key="fallout" value="false"/>
                <parameter key="f_measure" value="false"/>
                <parameter key="false_positive" value="false"/>
                <parameter key="false_negative" value="false"/>
                <parameter key="true_positive" value="false"/>
                <parameter key="true_negative" value="false"/>
                <parameter key="sensitivity" value="false"/>
                <parameter key="specificity" value="false"/>
                <parameter key="youden" value="false"/>
                <parameter key="positive_predictive_value" value="false"/>
                <parameter key="negative_predictive_value" value="false"/>
                <parameter key="psep" value="false"/>
                <parameter key="skip_undefined_labels" value="true"/>
                <parameter key="use_example_weights" value="true"/>
              </operator>
              <connect from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
              <connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
              <connect from_op="Performance (2)" from_port="example set" to_port="test set results"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_test set results" spacing="0"/>
              <portSpacing port="sink_performance 1" spacing="0"/>
              <portSpacing port="sink_performance 2" spacing="0"/>
              <description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
            </process>
            <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
          </operator>
          <operator activated="true" breakpoints="after" class="converters:confusionmatrix_2_example_set" compatibility="0.6.000" expanded="true" height="82" name="Confusion Matrix to ExampleSet" origin="GENERATED_TUTORIAL" width="90" x="648" y="136"/>
          <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro TP" width="90" x="782" y="136">
            <parameter key="macro" value="TP"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="true yes"/>
            <parameter key="example_index" value="2"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro TN" width="90" x="916" y="136">
            <parameter key="macro" value="TN"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="true no"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro FN" width="90" x="1050" y="136">
            <parameter key="macro" value="FN"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="true yes"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" breakpoints="after" class="extract_macro" compatibility="9.5.000" expanded="true" height="68" name="Extract Macro FP" width="90" x="1184" y="136">
            <parameter key="macro" value="FP"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="true no"/>
            <parameter key="example_index" value="2"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="generate_macro" compatibility="9.5.000" expanded="true" height="82" name="Generate Macro" width="90" x="1318" y="136">
            <list key="function_descriptions">
              <parameter key="MCC" value="(eval(%{TP})*eval(%{TN})-eval(%{FP})*eval(%{FN}))/(sqrt((eval(%{TP})+eval(%{FP}))*(eval(%{TP})+eval(%{FN}))*(eval(%{TN})+eval(%{FP}))*(eval(%{TN})+eval(%{FN}))))"/>
            </list>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.5.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="1117" y="238">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="MCC&#10;%{MCC}"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <connect from_op="Retrieve Golf" from_port="output" to_op="Validation" to_port="example set"/>
          <connect from_op="Validation" from_port="performance 1" to_op="Confusion Matrix to ExampleSet" to_port="per"/>
          <connect from_op="Confusion Matrix to ExampleSet" from_port="exa" to_op="Extract Macro TP" to_port="example set"/>
          <connect from_op="Confusion Matrix to ExampleSet" from_port="ori" to_port="result 2"/>
          <connect from_op="Extract Macro TP" from_port="example set" to_op="Extract Macro TN" to_port="example set"/>
          <connect from_op="Extract Macro TN" from_port="example set" to_op="Extract Macro FN" to_port="example set"/>
          <connect from_op="Extract Macro FN" from_port="example set" to_op="Extract Macro FP" to_port="example set"/>
          <connect from_op="Extract Macro FP" from_port="example set" to_op="Generate Macro" to_port="through 1"/>
          <connect from_op="Generate Macro" from_port="through 1" to_port="result 1"/>
          <connect from_op="Create ExampleSet" from_port="output" to_port="result 3"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
          <portSpacing port="sink_result 4" spacing="0"/>
        </process>
      </operator>
    </process>
    

    Hope this helps,

    Regards,

    Lionel



    varunm1sgenzerIngoRM
Sign In or Register to comment.