Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

How to aggregate attribute values by percentage

MarceloCMarceloC Member Posts: 2 Contributor I
edited November 2018 in Help
Hello there!  :)

I have a data set that I need to transform from nominal to binominal for posterior mining. However, In order to reduce the number of columns after the transformation, I want to get each attribute and check the total occurrence of each of it's values for all my examples (same thing that rapidminer does in the results -> statistics tab). So that I could get all the values below a certain threshold and aggregate them as a single value called "other_values".
I tried to figure this out using the loop attribute and loop value operators but I could not find a solution, anyone did something like this before or has any idea on how this can be acomplished?

Best regards  ;)

Marcelo.

Answers

  • MarceloCMarceloC Member Posts: 2 Contributor I
    Hey guys I believe I found a solution, but maybe there is an easier way. Hope this helps someone oneday.  :D
    <operator activated="true" class="subprocess" compatibility="6.5.000" expanded="true" height="76" name="Aggregate Attribute Values by Percentage" width="90" x="648" y="300">
            <process expanded="true">
              <operator activated="true" class="extract_macro" compatibility="6.5.000" expanded="true" height="60" name="Extract Macro (2)" width="90" x="45" y="30">
                <parameter key="macro" value="total_number_of_examples"/>
                <list key="additional_macros"/>
              </operator>
              <operator activated="true" class="loop_attributes" compatibility="6.5.000" expanded="true" height="76" name="Loop Attributes" width="90" x="246" y="30">
                <process expanded="true">
                  <operator activated="true" class="loop_values" compatibility="6.5.000" expanded="true" height="76" name="Loop Values" width="90" x="112" y="30">
                    <parameter key="attribute" value="%{loop_attribute}"/>
                    <process expanded="true">
                      <operator activated="true" class="set_macro" compatibility="6.5.000" expanded="true" height="76" name="Set Macro" width="90" x="45" y="120">
                        <parameter key="macro" value="threshold"/>
                        <parameter key="value" value="0.1"/>
                      </operator>
                      <operator activated="true" class="filter_examples" compatibility="6.5.000" expanded="true" height="94" name="Filter Examples" width="90" x="179" y="120">
                        <parameter key="parameter_string" value="%{loop_attribute} = %{loop_value}"/>
                        <parameter key="parameter_expression" value="%{loop_attribute} = %{loop_value}"/>
                        <parameter key="condition_class" value="attribute_value_filter"/>
                        <list key="filters_list">
                          <parameter key="filters_entry_key" value="%{loop_attribute}.equals.%{loop_value}"/>
                        </list>
                      </operator>
                      <operator activated="true" class="extract_macro" compatibility="6.5.000" expanded="true" height="60" name="Extract Macro" width="90" x="313" y="120">
                        <parameter key="macro" value="number_of_examples"/>
                        <list key="additional_macros"/>
                      </operator>
                      <operator activated="true" class="generate_macro" compatibility="6.5.000" expanded="true" height="76" name="Generate Macro" width="90" x="447" y="120">
                        <list key="function_descriptions">
                          <parameter key="rename_value" value="if((eval(%{number_of_examples}) / eval(%{total_number_of_examples})) &lt; eval(%{threshold}), str(1), str(2))"/>
                        </list>
                      </operator>
                      <operator activated="true" class="select_subprocess" compatibility="6.5.000" expanded="true" height="76" name="Select Subprocess" width="90" x="581" y="120">
                        <parameter key="select_which" value="%{rename_value}"/>
                        <process expanded="true">
                          <operator activated="true" class="replace" compatibility="6.5.000" expanded="true" height="76" name="Replace (3)" width="90" x="112" y="30">
                            <parameter key="attribute_filter_type" value="single"/>
                            <parameter key="attribute" value="%{loop_attribute}"/>
                            <parameter key="replace_what" value=".+"/>
                            <parameter key="replace_by" value="other_values"/>
                          </operator>
                          <connect from_port="input 1" to_op="Replace (3)" to_port="example set input"/>
                          <connect from_op="Replace (3)" from_port="example set output" to_port="output 1"/>
                          <portSpacing port="source_input 1" spacing="0"/>
                          <portSpacing port="source_input 2" spacing="0"/>
                          <portSpacing port="sink_output 1" spacing="0"/>
                          <portSpacing port="sink_output 2" spacing="0"/>
                        </process>
                        <process expanded="true">
                          <connect from_port="input 1" to_port="output 1"/>
                          <portSpacing port="source_input 1" spacing="0"/>
                          <portSpacing port="source_input 2" spacing="0"/>
                          <portSpacing port="sink_output 1" spacing="0"/>
                          <portSpacing port="sink_output 2" spacing="0"/>
                        </process>
                      </operator>
                      <connect from_port="example set" to_op="Set Macro" to_port="through 1"/>
                      <connect from_op="Set Macro" from_port="through 1" to_op="Filter Examples" to_port="example set input"/>
                      <connect from_op="Filter Examples" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
                      <connect from_op="Extract Macro" from_port="example set" to_op="Generate Macro" to_port="through 1"/>
                      <connect from_op="Generate Macro" from_port="through 1" to_op="Select Subprocess" to_port="input 1"/>
                      <connect from_op="Select Subprocess" from_port="output 1" to_port="out 1"/>
                      <portSpacing port="source_example set" spacing="0"/>
                      <portSpacing port="sink_out 1" spacing="0"/>
                      <portSpacing port="sink_out 2" spacing="0"/>
                    </process>
                  </operator>
                  <operator activated="true" breakpoints="after" class="append" compatibility="6.5.000" expanded="true" height="76" name="Append" width="90" x="313" y="30"/>
                  <connect from_port="example set" to_op="Loop Values" to_port="example set"/>
                  <connect from_op="Loop Values" from_port="out 1" to_op="Append" to_port="example set 1"/>
                  <connect from_op="Append" from_port="merged set" to_port="example set"/>
                  <portSpacing port="source_example set" spacing="0"/>
                  <portSpacing port="sink_example set" spacing="0"/>
                  <portSpacing port="sink_result 1" spacing="0"/>
                </process>
              </operator>
              <connect from_port="in 1" to_op="Extract Macro (2)" to_port="example set"/>
              <connect from_op="Extract Macro (2)" from_port="example set" to_op="Loop Attributes" to_port="example set"/>
              <connect from_op="Loop Attributes" from_port="example set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
    Best regards!

    Marcelo.
Sign In or Register to comment.