couldn't get top p% attributes

DavidRajuDavidRaju Member Posts: 18 Contributor II
edited November 2018 in Help

Could you please clarify me - to get top p% of attributes from the obtained weighting attributes and then the relevant  accuracy.

code attached

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="5.3.013" expanded="true" height="60" name="Retrieve Sonar" width="90" x="45" y="30">
        <parameter key="repository_entry" value="//Samples/data/Sonar"/>
      </operator>
      <operator activated="true" class="weight_by_information_gain_ratio" compatibility="5.3.013" expanded="true" height="76" name="Weight by Information Gain Ratio" width="90" x="179" y="30"/>
      <operator activated="true" class="select_by_weights" compatibility="5.3.013" expanded="true" height="94" name="Select by Weights" width="90" x="380" y="30">
        <parameter key="weight_relation" value="top p%"/>
        <parameter key="weight" value="0.0"/>
        <parameter key="p" value="10"/>
      </operator>
      <operator activated="true" class="x_validation" compatibility="5.3.013" expanded="true" height="112" name="Validation" width="90" x="581" y="30">
        <parameter key="use_local_random_seed" value="true"/>
        <process expanded="true">
          <operator activated="true" class="k_nn" compatibility="5.3.013" expanded="true" height="76" name="k-NN" width="90" x="112" y="30"/>
          <operator activated="false" class="naive_bayes" compatibility="5.3.013" expanded="true" height="76" name="Naive Bayes" width="90" x="112" y="300"/>
          <connect from_port="training" to_op="k-NN" to_port="training set"/>
          <connect from_op="k-NN" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="weight_by_information_gain_ratio" compatibility="5.3.013" expanded="true" height="76" name="Weight by Information Gain Ratio (9)" width="90" x="179" y="615"/>
      <operator activated="false" class="select_by_weights" compatibility="5.3.013" expanded="true" height="94" name="Select by Weights (9)" width="90" x="313" y="615">
        <parameter key="weight_relation" value="top p%"/>
        <parameter key="weight" value="0.126"/>
        <parameter key="k" value="18"/>
        <parameter key="p" value="10"/>
      </operator>
      <operator activated="false" class="multiply" compatibility="5.3.013" expanded="true" height="60" name="Multiply (10)" width="90" x="447" y="615"/>
      <operator activated="false" class="x_validation" compatibility="5.3.013" expanded="true" height="112" name="Validation (9)" width="90" x="581" y="615">
        <parameter key="use_local_random_seed" value="true"/>
        <process expanded="true">
          <operator activated="true" class="k_nn" compatibility="5.3.013" expanded="true" name="k-NN (9)"/>
          <operator activated="false" class="naive_bayes" compatibility="5.3.013" expanded="true" name="Naive Bayes (9)"/>
          <connect from_port="training" to_op="k-NN (9)" to_port="training set"/>
          <connect from_op="k-NN (9)" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" name="Apply Model (9)">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" name="Performance (9)">
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model (9)" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model (9)" to_port="unlabelled data"/>
          <connect from_op="Apply Model (9)" from_port="labelled data" to_op="Performance (9)" to_port="labelled data"/>
          <connect from_op="Performance (9)" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="weight_by_information_gain_ratio" compatibility="5.3.013" expanded="true" height="76" name="Weight by Information Gain Ratio (10)" width="90" x="179" y="705"/>
      <operator activated="false" class="select_by_weights" compatibility="5.3.013" expanded="true" height="94" name="Select by Weights (10)" width="90" x="313" y="705">
        <parameter key="weight_relation" value="top p%"/>
        <parameter key="weight" value="0.164"/>
        <parameter key="k" value="15"/>
        <parameter key="p" value="100"/>
      </operator>
      <operator activated="false" class="x_validation" compatibility="5.3.013" expanded="true" height="112" name="Validation (10)" width="90" x="581" y="705">
        <parameter key="use_local_random_seed" value="true"/>
        <process expanded="true">
          <operator activated="true" class="k_nn" compatibility="5.3.013" expanded="true" height="76" name="k-NN (10)" width="90" x="106" y="30"/>
          <operator activated="false" class="naive_bayes" compatibility="5.3.013" expanded="true" height="76" name="Naive Bayes (10)" width="90" x="179" y="300"/>
          <connect from_port="training" to_op="k-NN (10)" to_port="training set"/>
          <connect from_op="k-NN (10)" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model (10)" width="90" x="45" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" height="76" name="Performance (10)" width="90" x="175" y="30">
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model (10)" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model (10)" to_port="unlabelled data"/>
          <connect from_op="Apply Model (10)" from_port="labelled data" to_op="Performance (10)" to_port="labelled data"/>
          <connect from_op="Performance (10)" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve Sonar" from_port="output" to_op="Weight by Information Gain Ratio" to_port="example set"/>
      <connect from_op="Weight by Information Gain Ratio" from_port="weights" to_op="Select by Weights" to_port="weights"/>
      <connect from_op="Weight by Information Gain Ratio" from_port="example set" to_op="Select by Weights" to_port="example set input"/>
      <connect from_op="Select by Weights" from_port="example set output" to_op="Validation" to_port="training"/>
      <connect from_op="Select by Weights" from_port="original" to_port="result 1"/>
      <connect from_op="Select by Weights" from_port="weights" to_port="result 2"/>
      <connect from_op="Validation" from_port="averagable 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>


Further whatever the value entered in the p parameter, after its deselection( inactive mode)-it  is set to default value 1.0. To fix it, moved to  XML code to change the value from default 1.0 to the desired p value. So how to enter the desired p value in the p parameter in graph layout?



Answers

  • MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869 Unicorn
    Hi,

    actually the label of the parameter is wrong: it does not take the top p%, but you have to specify a fraction, i.e. if you want the top 10% of the attributes the value should be 0.1. That's probably also the reason for your second issue.

    Anyway, I have created an internal ticket requesting to either fix the label or the behavior of the parameter.

    Best regards,
    Marius
  • DavidRajuDavidRaju Member Posts: 18 Contributor II
    Thank you, I try it
Sign In or Register to comment.