Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

Boosting, Bagging and Random Forest

anyavery1anyavery1 Member Posts: 1 Learner III
edited November 2018 in Help
Hi
I ran Adaboost, Bagging and Random Forest on my data. The performance vector results were not really different from the decision tree performance vector result. So I used the Sonar data and ran, Decision Tree followed by Adaboost, Bagging and Random Forest. Though in the tutorial it is mentioned that the performance vector accuracy improves I still got similar results.
Require inputs.

Regards
Anya

Answers

  • JEdwardJEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
    Here's an example of using the Stacking operator for Ensemble modelling.  It creates a Forest of Forests to produce greater mine detection accuracy. 
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.0.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="85">
            <parameter key="repository_entry" value="//Samples/data/Sonar"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="7.0.001" expanded="true" height="82" name="Generate ID" width="90" x="45" y="187"/>
          <operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample (2)" width="90" x="179" y="85">
            <parameter key="sample" value="probability"/>
            <parameter key="balance_data" value="true"/>
            <list key="sample_size_per_class"/>
            <list key="sample_ratio_per_class"/>
            <list key="sample_probability_per_class">
              <parameter key="Mine" value="0.2"/>
              <parameter key="Rock" value="0.5"/>
            </list>
            <description align="center" color="transparent" colored="false" width="126">This undersamples the class 'Mine' to make it even more difficult to detect mines</description>
          </operator>
          <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="StackingMagic" width="90" x="380" y="34">
            <process expanded="true">
              <operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="124" name="Multiply" width="90" x="45" y="85"/>
              <operator activated="true" class="stacking" compatibility="7.0.001" expanded="true" height="68" name="Stacking" width="90" x="179" y="34">
                <parameter key="keep_all_attributes" value="false"/>
                <process expanded="true">
                  <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="45" y="34">
                    <parameter key="macro" value="posClass"/>
                    <parameter key="macro_type" value="statistics"/>
                    <parameter key="statistics" value="count"/>
                    <parameter key="attribute_name" value="class"/>
                    <parameter key="attribute_value" value="Mine"/>
                    <list key="additional_macros"/>
                  </operator>
                  <operator activated="true" class="loop" compatibility="7.0.001" expanded="true" height="82" name="Loop Trees" width="90" x="179" y="34">
                    <parameter key="set_iteration_macro" value="true"/>
                    <parameter key="iterations" value="20"/>
                    <process expanded="true">
                      <operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample" width="90" x="45" y="34">
                        <parameter key="balance_data" value="true"/>
                        <list key="sample_size_per_class">
                          <parameter key="Mine" value="%{posClass}"/>
                          <parameter key="Rock" value="%{posClass}"/>
                        </list>
                        <list key="sample_ratio_per_class"/>
                        <list key="sample_probability_per_class"/>
                        <parameter key="use_local_random_seed" value="true"/>
                        <parameter key="local_random_seed" value="%{iteration}"/>
                      </operator>
                      <operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="82" name="Multiply (2)" width="90" x="112" y="136">
                        <description align="center" color="transparent" colored="false" width="126">You can add other models as you like here.</description>
                      </operator>
                      <operator activated="true" class="parallel_random_forest" compatibility="7.0.001" expanded="true" height="82" name="Random Forest" width="90" x="246" y="136"/>
                      <connect from_port="input 1" to_op="Sample" to_port="example set input"/>
                      <connect from_op="Sample" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
                      <connect from_op="Multiply (2)" from_port="output 1" to_op="Random Forest" to_port="training set"/>
                      <connect from_op="Random Forest" from_port="model" to_port="output 1"/>
                      <portSpacing port="source_input 1" spacing="0"/>
                      <portSpacing port="source_input 2" spacing="0"/>
                      <portSpacing port="sink_output 1" spacing="0"/>
                      <portSpacing port="sink_output 2" spacing="0"/>
                    </process>
                    <description align="center" color="transparent" colored="false" width="126">Each loop builds a new tree using a different balanced data sample. The loop iteration is used as the RandomSeed so the more loops, the more trees.</description>
                  </operator>
                  <connect from_port="training set 1" to_op="Extract Macro" to_port="example set"/>
                  <connect from_op="Extract Macro" from_port="example set" to_op="Loop Trees" to_port="input 1"/>
                  <connect from_op="Loop Trees" from_port="output 1" to_port="base model 1"/>
                  <portSpacing port="source_training set 1" spacing="0"/>
                  <portSpacing port="source_training set 2" spacing="0"/>
                  <portSpacing port="sink_base model 1" spacing="0"/>
                  <portSpacing port="sink_base model 2" spacing="0"/>
                </process>
                <process expanded="true">
                  <operator activated="false" class="naive_bayes" compatibility="7.0.001" expanded="true" height="82" name="Naive Bayes" width="90" x="45" y="85"/>
                  <operator activated="true" class="weka:W-Logistic" compatibility="7.0.000" expanded="true" height="82" name="W-Logistic" width="90" x="45" y="187"/>
                  <connect from_port="stacking examples" to_op="W-Logistic" to_port="training set"/>
                  <connect from_op="W-Logistic" from_port="model" to_port="stacking model"/>
                  <portSpacing port="source_stacking examples" spacing="0"/>
                  <portSpacing port="sink_stacking model" spacing="0"/>
                </process>
                <description align="center" color="transparent" colored="false" width="126">This is the operator where things are interesting.</description>
              </operator>
              <operator activated="true" class="set_minus" compatibility="7.0.001" expanded="true" height="82" name="Set Minus" width="90" x="179" y="289">
                <description align="center" color="transparent" colored="false" width="126">Keeps it fair (ineffect it's a split test, but I went the complicated way)</description>
              </operator>
              <operator activated="true" class="apply_model" compatibility="7.0.001" expanded="true" height="82" name="Apply Model" width="90" x="313" y="136">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance" compatibility="7.0.001" expanded="true" height="82" name="Performance Loop Stacking" width="90" x="380" y="34"/>
              <connect from_port="in 1" to_op="Multiply" to_port="input"/>
              <connect from_port="in 2" to_op="Set Minus" to_port="example set input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Stacking" to_port="training set"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Set Minus" to_port="subtrahend"/>
              <connect from_op="Stacking" from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_op="Set Minus" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance Loop Stacking" to_port="labelled data"/>
              <connect from_op="Apply Model" from_port="model" to_port="out 1"/>
              <connect from_op="Performance Loop Stacking" from_port="performance" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="189"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">This uses a loop to generate many decision trees using different samples of the data for a more complete picture.</description>
          </operator>
          <connect from_op="Retrieve Sonar" from_port="output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_op="Sample (2)" to_port="example set input"/>
          <connect from_op="Sample (2)" from_port="example set output" to_op="StackingMagic" to_port="in 1"/>
          <connect from_op="Sample (2)" from_port="original" to_op="StackingMagic" to_port="in 2"/>
          <connect from_op="StackingMagic" from_port="out 1" to_port="result 1"/>
          <connect from_op="StackingMagic" from_port="out 2" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
Sign In or Register to comment.