The Altair Community is migrating to a new platform to provide a better experience for you. The RapidMiner Community will merge with the Altair Community at the same time. In preparation for the migration, both communities are on read-only mode from July 15th - July 24th, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here.
Options

Boosting, Bagging and Random Forest

anyavery1anyavery1 Member Posts: 1 Contributor I
edited November 2018 in Help
Hi
I ran Adaboost, Bagging and Random Forest on my data. The performance vector results were not really different from the decision tree performance vector result. So I used the Sonar data and ran, Decision Tree followed by Adaboost, Bagging and Random Forest. Though in the tutorial it is mentioned that the performance vector accuracy improves I still got similar results.
Require inputs.

Regards
Anya

Answers

  • Options
    JEdwardJEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
    Here's an example of using the Stacking operator for Ensemble modelling.  It creates a Forest of Forests to produce greater mine detection accuracy. 
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.0.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="85">
            <parameter key="repository_entry" value="//Samples/data/Sonar"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="7.0.001" expanded="true" height="82" name="Generate ID" width="90" x="45" y="187"/>
          <operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample (2)" width="90" x="179" y="85">
            <parameter key="sample" value="probability"/>
            <parameter key="balance_data" value="true"/>
            <list key="sample_size_per_class"/>
            <list key="sample_ratio_per_class"/>
            <list key="sample_probability_per_class">
              <parameter key="Mine" value="0.2"/>
              <parameter key="Rock" value="0.5"/>
            </list>
            <description align="center" color="transparent" colored="false" width="126">This undersamples the class 'Mine' to make it even more difficult to detect mines</description>
          </operator>
          <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="StackingMagic" width="90" x="380" y="34">
            <process expanded="true">
              <operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="124" name="Multiply" width="90" x="45" y="85"/>
              <operator activated="true" class="stacking" compatibility="7.0.001" expanded="true" height="68" name="Stacking" width="90" x="179" y="34">
                <parameter key="keep_all_attributes" value="false"/>
                <process expanded="true">
                  <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="45" y="34">
                    <parameter key="macro" value="posClass"/>
                    <parameter key="macro_type" value="statistics"/>
                    <parameter key="statistics" value="count"/>
                    <parameter key="attribute_name" value="class"/>
                    <parameter key="attribute_value" value="Mine"/>
                    <list key="additional_macros"/>
                  </operator>
                  <operator activated="true" class="loop" compatibility="7.0.001" expanded="true" height="82" name="Loop Trees" width="90" x="179" y="34">
                    <parameter key="set_iteration_macro" value="true"/>
                    <parameter key="iterations" value="20"/>
                    <process expanded="true">
                      <operator activated="true" class="sample" compatibility="7.0.001" expanded="true" height="82" name="Sample" width="90" x="45" y="34">
                        <parameter key="balance_data" value="true"/>
                        <list key="sample_size_per_class">
                          <parameter key="Mine" value="%{posClass}"/>
                          <parameter key="Rock" value="%{posClass}"/>
                        </list>
                        <list key="sample_ratio_per_class"/>
                        <list key="sample_probability_per_class"/>
                        <parameter key="use_local_random_seed" value="true"/>
                        <parameter key="local_random_seed" value="%{iteration}"/>
                      </operator>
                      <operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="82" name="Multiply (2)" width="90" x="112" y="136">
                        <description align="center" color="transparent" colored="false" width="126">You can add other models as you like here.</description>
                      </operator>
                      <operator activated="true" class="parallel_random_forest" compatibility="7.0.001" expanded="true" height="82" name="Random Forest" width="90" x="246" y="136"/>
                      <connect from_port="input 1" to_op="Sample" to_port="example set input"/>
                      <connect from_op="Sample" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
                      <connect from_op="Multiply (2)" from_port="output 1" to_op="Random Forest" to_port="training set"/>
                      <connect from_op="Random Forest" from_port="model" to_port="output 1"/>
                      <portSpacing port="source_input 1" spacing="0"/>
                      <portSpacing port="source_input 2" spacing="0"/>
                      <portSpacing port="sink_output 1" spacing="0"/>
                      <portSpacing port="sink_output 2" spacing="0"/>
                    </process>
                    <description align="center" color="transparent" colored="false" width="126">Each loop builds a new tree using a different balanced data sample. The loop iteration is used as the RandomSeed so the more loops, the more trees.</description>
                  </operator>
                  <connect from_port="training set 1" to_op="Extract Macro" to_port="example set"/>
                  <connect from_op="Extract Macro" from_port="example set" to_op="Loop Trees" to_port="input 1"/>
                  <connect from_op="Loop Trees" from_port="output 1" to_port="base model 1"/>
                  <portSpacing port="source_training set 1" spacing="0"/>
                  <portSpacing port="source_training set 2" spacing="0"/>
                  <portSpacing port="sink_base model 1" spacing="0"/>
                  <portSpacing port="sink_base model 2" spacing="0"/>
                </process>
                <process expanded="true">
                  <operator activated="false" class="naive_bayes" compatibility="7.0.001" expanded="true" height="82" name="Naive Bayes" width="90" x="45" y="85"/>
                  <operator activated="true" class="weka:W-Logistic" compatibility="7.0.000" expanded="true" height="82" name="W-Logistic" width="90" x="45" y="187"/>
                  <connect from_port="stacking examples" to_op="W-Logistic" to_port="training set"/>
                  <connect from_op="W-Logistic" from_port="model" to_port="stacking model"/>
                  <portSpacing port="source_stacking examples" spacing="0"/>
                  <portSpacing port="sink_stacking model" spacing="0"/>
                </process>
                <description align="center" color="transparent" colored="false" width="126">This is the operator where things are interesting.</description>
              </operator>
              <operator activated="true" class="set_minus" compatibility="7.0.001" expanded="true" height="82" name="Set Minus" width="90" x="179" y="289">
                <description align="center" color="transparent" colored="false" width="126">Keeps it fair (ineffect it's a split test, but I went the complicated way)</description>
              </operator>
              <operator activated="true" class="apply_model" compatibility="7.0.001" expanded="true" height="82" name="Apply Model" width="90" x="313" y="136">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance" compatibility="7.0.001" expanded="true" height="82" name="Performance Loop Stacking" width="90" x="380" y="34"/>
              <connect from_port="in 1" to_op="Multiply" to_port="input"/>
              <connect from_port="in 2" to_op="Set Minus" to_port="example set input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Stacking" to_port="training set"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Set Minus" to_port="subtrahend"/>
              <connect from_op="Stacking" from_port="model" to_op="Apply Model" to_port="model"/>
              <connect from_op="Set Minus" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
              <connect from_op="Apply Model" from_port="labelled data" to_op="Performance Loop Stacking" to_port="labelled data"/>
              <connect from_op="Apply Model" from_port="model" to_port="out 1"/>
              <connect from_op="Performance Loop Stacking" from_port="performance" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="189"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">This uses a loop to generate many decision trees using different samples of the data for a more complete picture.</description>
          </operator>
          <connect from_op="Retrieve Sonar" from_port="output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_op="Sample (2)" to_port="example set input"/>
          <connect from_op="Sample (2)" from_port="example set output" to_op="StackingMagic" to_port="in 1"/>
          <connect from_op="Sample (2)" from_port="original" to_op="StackingMagic" to_port="in 2"/>
          <connect from_op="StackingMagic" from_port="out 1" to_port="result 1"/>
          <connect from_op="StackingMagic" from_port="out 2" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
Sign In or Register to comment.