RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

Generate Attribute to find 2nd and 3rd minimums

ZKuiperZKuiper Member Posts: 11 Contributor II
edited June 28 in Help
Have a large set 17 atts, ~1.6 mil rows. Looking to generate 3 attributes; minimum, 2nd min, 3rd min of each row. The first minimum is simple using the generate aggregation block but after that it gets messy. I can think of a few ways to do this, setting a macro of the min and doing 2 huge if statement sets in generate attributes where it checks every att to see if it matches the min val and returning missing else returning the value ect or a loop with transpose and sorting but these seem a bit in-elegant. Any simpler methods I am missing?

Best Answer

  • hbajpaihbajpai Posts: 87   Unicorn
    Solution Accepted
    Hey @ZKuiper

    I can think of a leaner way to do this utilizing one sorting that can help you extract all the mins together as macros. Check out the XML.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.7.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.7.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Samples/data/Sonar"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.7.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="179" y="136">
            <list key="function_descriptions">
              <parameter key="min_1" value="MISSING_NUMERIC"/>
              <parameter key="min_2" value="MISSING_NUMERIC"/>
              <parameter key="min_3" value="MISSING_NUMERIC"/>
            </list>
            <parameter key="keep_all" value="true"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="9.7.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="136">
            <parameter key="macro" value="total_rows"/>
            <parameter key="macro_type" value="number_of_examples"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value=""/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="concurrency:loop" compatibility="9.7.001" expanded="true" height="82" name="Loop" width="90" x="447" y="136">
            <parameter key="number_of_iterations" value="%{total_rows}"/>
            <parameter key="iteration_macro" value="i"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="filter_example_range" compatibility="9.7.001" expanded="true" height="82" name="Filter Example Range" width="90" x="112" y="85">
                <parameter key="first_example" value="%{i}"/>
                <parameter key="last_example" value="%{i}"/>
                <parameter key="invert_filter" value="false"/>
              </operator>
              <operator activated="true" class="multiply" compatibility="9.7.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="85"/>
              <operator activated="true" class="transpose" compatibility="9.7.001" expanded="true" height="82" name="Transpose" width="90" x="380" y="340"/>
              <operator activated="true" class="sort" compatibility="9.7.001" expanded="true" height="82" name="Sort" width="90" x="514" y="340">
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="sorting_direction" value="increasing"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format)" width="90" x="648" y="340">
                <parameter key="macro" value="min_1"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="example_index" value="1"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (2)" width="90" x="782" y="340">
                <parameter key="macro" value="min_2"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="example_index" value="2"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (3)" width="90" x="916" y="340">
                <parameter key="macro" value="min_3"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="example_index" value="3"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="9.7.001" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="447" y="85">
                <list key="function_descriptions">
                  <parameter key="min_1" value="eval(%{min_1})"/>
                  <parameter key="min_2" value="eval(%{min_2})"/>
                  <parameter key="min_3" value="eval(%{min_3})"/>
                </list>
                <parameter key="keep_all" value="true"/>
              </operator>
              <connect from_port="input 1" to_op="Filter Example Range" to_port="example set input"/>
              <connect from_op="Filter Example Range" from_port="example set output" to_op="Multiply" to_port="input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Transpose" to_port="example set input"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Generate Attributes (3)" to_port="example set input"/>
              <connect from_op="Transpose" from_port="example set output" to_op="Sort" to_port="example set input"/>
              <connect from_op="Sort" from_port="example set output" to_op="Extract Macro (Format)" to_port="example set"/>
              <connect from_op="Extract Macro (Format)" from_port="example set" to_op="Extract Macro (Format) (2)" to_port="example set"/>
              <connect from_op="Extract Macro (Format) (2)" from_port="example set" to_op="Extract Macro (Format) (3)" to_port="example set"/>
              <connect from_op="Generate Attributes (3)" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="append" compatibility="9.7.001" expanded="true" height="82" name="Append" width="90" x="581" y="136">
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="merge_type" value="all"/>
          </operator>
          <connect from_op="Retrieve Sonar" from_port="output" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Loop" to_port="input 1"/>
          <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
          <connect from_op="Append" from_port="merged set" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>


    Best,
    Harshit

Answers

  • ZKuiperZKuiper Member Posts: 11 Contributor II
    For the time being I have used a transpose, loop atts, and sort to get the job done. I still feel like there is a better way but this will do for now.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.7.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="generate_id" compatibility="9.7.000" expanded="true" height="82" name="Generate ID (117)" width="90" x="313" y="187">
            <parameter key="create_nominal_ids" value="true"/>
            <parameter key="offset" value="0"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="9.7.000" expanded="true" height="82" name="Multiply (20)" width="90" x="447" y="187"/>
          <operator activated="true" class="transpose" compatibility="9.7.000" expanded="true" height="82" name="Transpose (82)" width="90" x="581" y="187"/>
          <operator activated="true" class="concurrency:loop_attributes" compatibility="9.7.000" expanded="true" height="82" name="Loop Attributes (5)" width="90" x="715" y="187">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="attribute_name_macro" value="loop_attribute"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="select_attributes" compatibility="9.7.000" expanded="true" height="82" name="Select Attributes (153)" width="90" x="45" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="%{loop_attribute}"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="attribute_value"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="time"/>
                <parameter key="block_type" value="attribute_block"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="value_matrix_row_start"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (3)" width="90" x="179" y="34">
                <parameter key="macro" value="center"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="%{loop_attribute}"/>
                <parameter key="example_index" value="1"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="sort" compatibility="9.7.000" expanded="true" height="82" name="Sort (2)" width="90" x="313" y="34">
                <parameter key="attribute_name" value="%{loop_attribute}"/>
                <parameter key="sorting_direction" value="increasing"/>
              </operator>
              <operator activated="true" class="generate_id" compatibility="9.7.000" expanded="true" height="82" name="Generate ID (119)" width="90" x="447" y="34">
                <parameter key="create_nominal_ids" value="false"/>
                <parameter key="offset" value="0"/>
              </operator>
              <connect from_port="input 1" to_op="Select Attributes (153)" to_port="example set input"/>
              <connect from_op="Select Attributes (153)" from_port="example set output" to_op="Extract Macro (Format) (3)" to_port="example set"/>
              <connect from_op="Extract Macro (Format) (3)" from_port="example set" to_op="Sort (2)" to_port="example set input"/>
              <connect from_op="Sort (2)" from_port="example set output" to_op="Generate ID (119)" to_port="example set input"/>
              <connect from_op="Generate ID (119)" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="loop_collection" compatibility="9.7.000" expanded="true" height="68" name="Loop Collection (3)" width="90" x="849" y="187">
            <parameter key="set_iteration_macro" value="true"/>
            <parameter key="macro_name" value="iteration"/>
            <parameter key="macro_start_value" value="1"/>
            <parameter key="unfold" value="false"/>
            <process expanded="true">
              <operator activated="true" class="branch" compatibility="9.7.000" expanded="true" height="82" name="Branch (7)" width="90" x="45" y="34">
                <parameter key="condition_type" value="expression"/>
                <parameter key="expression" value="%{iteration}==1"/>
                <parameter key="io_object" value="ANOVAMatrix"/>
                <parameter key="return_inner_output" value="true"/>
                <process expanded="true">
                  <operator activated="true" class="remember" compatibility="9.7.000" expanded="true" height="68" name="Remember (2)" width="90" x="45" y="34">
                    <parameter key="name" value="dataset"/>
                    <parameter key="io_object" value="ExampleSet"/>
                    <parameter key="store_which" value="1"/>
                    <parameter key="remove_from_process" value="true"/>
                  </operator>
                  <connect from_port="condition" to_op="Remember (2)" to_port="store"/>
                  <portSpacing port="source_condition" spacing="0"/>
                  <portSpacing port="source_input 1" spacing="0"/>
                  <portSpacing port="sink_input 1" spacing="0"/>
                </process>
                <process expanded="true">
                  <operator activated="true" class="recall" compatibility="9.7.000" expanded="true" height="68" name="Recall (5)" width="90" x="45" y="34">
                    <parameter key="name" value="dataset"/>
                    <parameter key="io_object" value="ExampleSet"/>
                    <parameter key="remove_from_store" value="true"/>
                  </operator>
                  <operator activated="true" class="concurrency:join" compatibility="9.7.000" expanded="true" height="82" name="Join (96)" width="90" x="179" y="85">
                    <parameter key="remove_double_attributes" value="true"/>
                    <parameter key="join_type" value="left"/>
                    <parameter key="use_id_attribute_as_key" value="true"/>
                    <list key="key_attributes"/>
                    <parameter key="keep_both_join_attributes" value="false"/>
                  </operator>
                  <operator activated="true" class="remember" compatibility="9.7.000" expanded="true" height="68" name="Remember (6)" width="90" x="313" y="85">
                    <parameter key="name" value="dataset"/>
                    <parameter key="io_object" value="ExampleSet"/>
                    <parameter key="store_which" value="1"/>
                    <parameter key="remove_from_process" value="true"/>
                  </operator>
                  <connect from_port="condition" to_op="Join (96)" to_port="right"/>
                  <connect from_op="Recall (5)" from_port="result" to_op="Join (96)" to_port="left"/>
                  <connect from_op="Join (96)" from_port="join" to_op="Remember (6)" to_port="store"/>
                  <portSpacing port="source_condition" spacing="0"/>
                  <portSpacing port="source_input 1" spacing="0"/>
                  <portSpacing port="sink_input 1" spacing="0"/>
                </process>
              </operator>
              <connect from_port="single" to_op="Branch (7)" to_port="condition"/>
              <portSpacing port="source_single" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="recall" compatibility="9.7.000" expanded="true" height="68" name="Recall (6)" width="90" x="179" y="289">
            <parameter key="name" value="dataset"/>
            <parameter key="io_object" value="ExampleSet"/>
            <parameter key="remove_from_store" value="true"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="9.7.000" expanded="true" height="82" name="Generate ID (118)" width="90" x="313" y="289">
            <parameter key="create_nominal_ids" value="true"/>
            <parameter key="offset" value="0"/>
          </operator>
          <operator activated="true" class="transpose" compatibility="9.7.000" expanded="true" height="82" name="Transpose (83)" width="90" x="447" y="289"/>
          <operator activated="true" class="select_attributes" compatibility="9.7.000" expanded="true" height="82" name="Select Attributes (154)" width="90" x="581" y="289">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value="id_1|id_2|id_3"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.7.000" expanded="true" height="82" name="Rename (4)" width="90" x="715" y="289">
            <parameter key="old_name" value="id_1"/>
            <parameter key="new_name" value="distance1"/>
            <list key="rename_additional_attributes">
              <parameter key="id_2" value="distance2"/>
              <parameter key="id_3" value="distance3"/>
            </list>
          </operator>
          <operator activated="true" breakpoints="after" class="concurrency:join" compatibility="9.7.000" expanded="true" height="82" name="Join (42)" width="90" x="849" y="289">
            <parameter key="remove_double_attributes" value="true"/>
            <parameter key="join_type" value="inner"/>
            <parameter key="use_id_attribute_as_key" value="true"/>
            <list key="key_attributes"/>
            <parameter key="keep_both_join_attributes" value="false"/>
          </operator>
          <connect from_op="Generate ID (117)" from_port="example set output" to_op="Multiply (20)" to_port="input"/>
          <connect from_op="Multiply (20)" from_port="output 1" to_op="Transpose (82)" to_port="example set input"/>
          <connect from_op="Transpose (82)" from_port="example set output" to_op="Loop Attributes (5)" to_port="input 1"/>
          <connect from_op="Transpose (82)" from_port="original" to_op="Join (42)" to_port="left"/>
          <connect from_op="Loop Attributes (5)" from_port="output 1" to_op="Loop Collection (3)" to_port="collection"/>
          <connect from_op="Recall (6)" from_port="result" to_op="Generate ID (118)" to_port="example set input"/>
          <connect from_op="Generate ID (118)" from_port="example set output" to_op="Transpose (83)" to_port="example set input"/>
          <connect from_op="Transpose (83)" from_port="example set output" to_op="Select Attributes (154)" to_port="example set input"/>
          <connect from_op="Select Attributes (154)" from_port="example set output" to_op="Rename (4)" to_port="example set input"/>
          <connect from_op="Rename (4)" from_port="example set output" to_op="Join (42)" to_port="right"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
        </process>
      </operator>
    </process>
    


Sign In or Register to comment.