Options

how to calculate variable importance

yogafireyogafire Member Posts: 43 Contributor II
hello,

i use RM5
i wonder how to calculate variable importance for each variable at my dataset.

do you have any idea?

if you could share the xml process it will be great!!!  ;D

thank you

Answers

  • Options
    landland RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2,531 Unicorn
    Hi,
    there are different approaches available. You could either use on of the weighting schemes for estimating the importance of an attribute (as we call variables in RapidMiner). This will use a heuristic of some kind. The other option is to optimize the feature set for a specific learning algorithm. There are several operators available doing it, for example the backward elimination or the Optimize Selection operators.

    This process is a very simple example how to compute weights for attributes.
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.0">
      <context>
        <input>
          <location/>
        </input>
        <output>
          <location/>
          <location/>
          <location/>
          <location/>
        </output>
        <macros/>
      </context>
      <operator activated="true" class="process" expanded="true" name="Process">
        <process expanded="true" height="449" width="346">
          <operator activated="true" class="generate_data" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
            <parameter key="target_function" value="random classification"/>
          </operator>
          <operator activated="true" class="multiply" expanded="true" height="112" name="Multiply" width="90" x="45" y="120"/>
          <operator activated="true" class="optimize_selection_evolutionary" expanded="true" height="94" name="Optimize Selection (Evolutionary)" width="90" x="179" y="300">
            <process expanded="true">
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="optimize_selection_forward" expanded="true" height="94" name="Forward Selection" width="90" x="179" y="165">
            <process expanded="true" height="489" width="782">
              <operator activated="true" class="x_validation" expanded="true" height="112" name="Validation" width="90" x="45" y="30">
                <description>A cross-validation evaluating a decision tree model.</description>
                <process expanded="true" height="489" width="366">
                  <operator activated="true" class="decision_tree" expanded="true" height="76" name="Decision Tree" width="90" x="45" y="30"/>
                  <connect from_port="training" to_op="Decision Tree" to_port="training set"/>
                  <connect from_op="Decision Tree" from_port="model" to_port="model"/>
                  <portSpacing port="source_training" spacing="0"/>
                  <portSpacing port="sink_model" spacing="0"/>
                  <portSpacing port="sink_through 1" spacing="0"/>
                </process>
                <process expanded="true" height="489" width="366">
                  <operator activated="true" class="apply_model" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
                    <list key="application_parameters"/>
                  </operator>
                  <operator activated="true" class="performance" expanded="true" height="76" name="Performance" width="90" x="179" y="30"/>
                  <connect from_port="model" to_op="Apply Model" to_port="model"/>
                  <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
                  <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
                  <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
                  <portSpacing port="source_model" spacing="0"/>
                  <portSpacing port="source_test set" spacing="0"/>
                  <portSpacing port="source_through 1" spacing="0"/>
                  <portSpacing port="sink_averagable 1" spacing="0"/>
                  <portSpacing port="sink_averagable 2" spacing="0"/>
                </process>
              </operator>
              <connect from_port="example set" to_op="Validation" to_port="training"/>
              <connect from_op="Validation" from_port="averagable 1" to_port="performance"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_performance" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="weight_by_relief" expanded="true" height="76" name="Weight by Relief" width="90" x="179" y="30"/>
          <connect from_op="Generate Data" from_port="output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Weight by Relief" to_port="example set"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Forward Selection" to_port="example set"/>
          <connect from_op="Multiply" from_port="output 3" to_op="Optimize Selection (Evolutionary)" to_port="example set in"/>
          <connect from_op="Optimize Selection (Evolutionary)" from_port="weights" to_port="result 3"/>
          <connect from_op="Forward Selection" from_port="attribute weights" to_port="result 2"/>
          <connect from_op="Weight by Relief" from_port="weights" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
          <portSpacing port="sink_result 4" spacing="0"/>
        </process>
      </operator>
    </process>

    Greetings,
      Sebastian
Sign In or Register to comment.