Generate Attribute to find 2nd and 3rd minimums

ZKuiperZKuiper Member Posts: 11 Contributor II
edited June 2020 in Help
Have a large set 17 atts, ~1.6 mil rows. Looking to generate 3 attributes; minimum, 2nd min, 3rd min of each row. The first minimum is simple using the generate aggregation block but after that it gets messy. I can think of a few ways to do this, setting a macro of the min and doing 2 huge if statement sets in generate attributes where it checks every att to see if it matches the min val and returning missing else returning the value ect or a loop with transpose and sorting but these seem a bit in-elegant. Any simpler methods I am missing?

Best Answer

  • hbajpaihbajpai Member Posts: 102 Unicorn
    Solution Accepted
    Hey @ZKuiper

    I can think of a leaner way to do this utilizing one sorting that can help you extract all the mins together as macros. Check out the XML.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.7.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.7.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Samples/data/Sonar"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.7.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="179" y="136">
            <list key="function_descriptions">
              <parameter key="min_1" value="MISSING_NUMERIC"/>
              <parameter key="min_2" value="MISSING_NUMERIC"/>
              <parameter key="min_3" value="MISSING_NUMERIC"/>
            </list>
            <parameter key="keep_all" value="true"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="9.7.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="136">
            <parameter key="macro" value="total_rows"/>
            <parameter key="macro_type" value="number_of_examples"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value=""/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="concurrency:loop" compatibility="9.7.001" expanded="true" height="82" name="Loop" width="90" x="447" y="136">
            <parameter key="number_of_iterations" value="%{total_rows}"/>
            <parameter key="iteration_macro" value="i"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="filter_example_range" compatibility="9.7.001" expanded="true" height="82" name="Filter Example Range" width="90" x="112" y="85">
                <parameter key="first_example" value="%{i}"/>
                <parameter key="last_example" value="%{i}"/>
                <parameter key="invert_filter" value="false"/>
              </operator>
              <operator activated="true" class="multiply" compatibility="9.7.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="85"/>
              <operator activated="true" class="transpose" compatibility="9.7.001" expanded="true" height="82" name="Transpose" width="90" x="380" y="340"/>
              <operator activated="true" class="sort" compatibility="9.7.001" expanded="true" height="82" name="Sort" width="90" x="514" y="340">
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="sorting_direction" value="increasing"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format)" width="90" x="648" y="340">
                <parameter key="macro" value="min_1"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="example_index" value="1"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (2)" width="90" x="782" y="340">
                <parameter key="macro" value="min_2"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="example_index" value="2"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (3)" width="90" x="916" y="340">
                <parameter key="macro" value="min_3"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="att_1"/>
                <parameter key="example_index" value="3"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="9.7.001" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="447" y="85">
                <list key="function_descriptions">
                  <parameter key="min_1" value="eval(%{min_1})"/>
                  <parameter key="min_2" value="eval(%{min_2})"/>
                  <parameter key="min_3" value="eval(%{min_3})"/>
                </list>
                <parameter key="keep_all" value="true"/>
              </operator>
              <connect from_port="input 1" to_op="Filter Example Range" to_port="example set input"/>
              <connect from_op="Filter Example Range" from_port="example set output" to_op="Multiply" to_port="input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Transpose" to_port="example set input"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Generate Attributes (3)" to_port="example set input"/>
              <connect from_op="Transpose" from_port="example set output" to_op="Sort" to_port="example set input"/>
              <connect from_op="Sort" from_port="example set output" to_op="Extract Macro (Format)" to_port="example set"/>
              <connect from_op="Extract Macro (Format)" from_port="example set" to_op="Extract Macro (Format) (2)" to_port="example set"/>
              <connect from_op="Extract Macro (Format) (2)" from_port="example set" to_op="Extract Macro (Format) (3)" to_port="example set"/>
              <connect from_op="Generate Attributes (3)" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="append" compatibility="9.7.001" expanded="true" height="82" name="Append" width="90" x="581" y="136">
            <parameter key="datamanagement" value="double_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="merge_type" value="all"/>
          </operator>
          <connect from_op="Retrieve Sonar" from_port="output" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Loop" to_port="input 1"/>
          <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/>
          <connect from_op="Append" from_port="merged set" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>


    Best,
    Harshit

Answers

  • ZKuiperZKuiper Member Posts: 11 Contributor II
    For the time being I have used a transpose, loop atts, and sort to get the job done. I still feel like there is a better way but this will do for now.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.7.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="generate_id" compatibility="9.7.000" expanded="true" height="82" name="Generate ID (117)" width="90" x="313" y="187">
            <parameter key="create_nominal_ids" value="true"/>
            <parameter key="offset" value="0"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="9.7.000" expanded="true" height="82" name="Multiply (20)" width="90" x="447" y="187"/>
          <operator activated="true" class="transpose" compatibility="9.7.000" expanded="true" height="82" name="Transpose (82)" width="90" x="581" y="187"/>
          <operator activated="true" class="concurrency:loop_attributes" compatibility="9.7.000" expanded="true" height="82" name="Loop Attributes (5)" width="90" x="715" y="187">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="attribute_name_macro" value="loop_attribute"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="select_attributes" compatibility="9.7.000" expanded="true" height="82" name="Select Attributes (153)" width="90" x="45" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="%{loop_attribute}"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="attribute_value"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="time"/>
                <parameter key="block_type" value="attribute_block"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="value_matrix_row_start"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
              </operator>
              <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (3)" width="90" x="179" y="34">
                <parameter key="macro" value="center"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="%{loop_attribute}"/>
                <parameter key="example_index" value="1"/>
                <list key="additional_macros"/>
                <parameter key="format_of_numericals" value="#.##"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
              </operator>
              <operator activated="true" class="sort" compatibility="9.7.000" expanded="true" height="82" name="Sort (2)" width="90" x="313" y="34">
                <parameter key="attribute_name" value="%{loop_attribute}"/>
                <parameter key="sorting_direction" value="increasing"/>
              </operator>
              <operator activated="true" class="generate_id" compatibility="9.7.000" expanded="true" height="82" name="Generate ID (119)" width="90" x="447" y="34">
                <parameter key="create_nominal_ids" value="false"/>
                <parameter key="offset" value="0"/>
              </operator>
              <connect from_port="input 1" to_op="Select Attributes (153)" to_port="example set input"/>
              <connect from_op="Select Attributes (153)" from_port="example set output" to_op="Extract Macro (Format) (3)" to_port="example set"/>
              <connect from_op="Extract Macro (Format) (3)" from_port="example set" to_op="Sort (2)" to_port="example set input"/>
              <connect from_op="Sort (2)" from_port="example set output" to_op="Generate ID (119)" to_port="example set input"/>
              <connect from_op="Generate ID (119)" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="loop_collection" compatibility="9.7.000" expanded="true" height="68" name="Loop Collection (3)" width="90" x="849" y="187">
            <parameter key="set_iteration_macro" value="true"/>
            <parameter key="macro_name" value="iteration"/>
            <parameter key="macro_start_value" value="1"/>
            <parameter key="unfold" value="false"/>
            <process expanded="true">
              <operator activated="true" class="branch" compatibility="9.7.000" expanded="true" height="82" name="Branch (7)" width="90" x="45" y="34">
                <parameter key="condition_type" value="expression"/>
                <parameter key="expression" value="%{iteration}==1"/>
                <parameter key="io_object" value="ANOVAMatrix"/>
                <parameter key="return_inner_output" value="true"/>
                <process expanded="true">
                  <operator activated="true" class="remember" compatibility="9.7.000" expanded="true" height="68" name="Remember (2)" width="90" x="45" y="34">
                    <parameter key="name" value="dataset"/>
                    <parameter key="io_object" value="ExampleSet"/>
                    <parameter key="store_which" value="1"/>
                    <parameter key="remove_from_process" value="true"/>
                  </operator>
                  <connect from_port="condition" to_op="Remember (2)" to_port="store"/>
                  <portSpacing port="source_condition" spacing="0"/>
                  <portSpacing port="source_input 1" spacing="0"/>
                  <portSpacing port="sink_input 1" spacing="0"/>
                </process>
                <process expanded="true">
                  <operator activated="true" class="recall" compatibility="9.7.000" expanded="true" height="68" name="Recall (5)" width="90" x="45" y="34">
                    <parameter key="name" value="dataset"/>
                    <parameter key="io_object" value="ExampleSet"/>
                    <parameter key="remove_from_store" value="true"/>
                  </operator>
                  <operator activated="true" class="concurrency:join" compatibility="9.7.000" expanded="true" height="82" name="Join (96)" width="90" x="179" y="85">
                    <parameter key="remove_double_attributes" value="true"/>
                    <parameter key="join_type" value="left"/>
                    <parameter key="use_id_attribute_as_key" value="true"/>
                    <list key="key_attributes"/>
                    <parameter key="keep_both_join_attributes" value="false"/>
                  </operator>
                  <operator activated="true" class="remember" compatibility="9.7.000" expanded="true" height="68" name="Remember (6)" width="90" x="313" y="85">
                    <parameter key="name" value="dataset"/>
                    <parameter key="io_object" value="ExampleSet"/>
                    <parameter key="store_which" value="1"/>
                    <parameter key="remove_from_process" value="true"/>
                  </operator>
                  <connect from_port="condition" to_op="Join (96)" to_port="right"/>
                  <connect from_op="Recall (5)" from_port="result" to_op="Join (96)" to_port="left"/>
                  <connect from_op="Join (96)" from_port="join" to_op="Remember (6)" to_port="store"/>
                  <portSpacing port="source_condition" spacing="0"/>
                  <portSpacing port="source_input 1" spacing="0"/>
                  <portSpacing port="sink_input 1" spacing="0"/>
                </process>
              </operator>
              <connect from_port="single" to_op="Branch (7)" to_port="condition"/>
              <portSpacing port="source_single" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="recall" compatibility="9.7.000" expanded="true" height="68" name="Recall (6)" width="90" x="179" y="289">
            <parameter key="name" value="dataset"/>
            <parameter key="io_object" value="ExampleSet"/>
            <parameter key="remove_from_store" value="true"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="9.7.000" expanded="true" height="82" name="Generate ID (118)" width="90" x="313" y="289">
            <parameter key="create_nominal_ids" value="true"/>
            <parameter key="offset" value="0"/>
          </operator>
          <operator activated="true" class="transpose" compatibility="9.7.000" expanded="true" height="82" name="Transpose (83)" width="90" x="447" y="289"/>
          <operator activated="true" class="select_attributes" compatibility="9.7.000" expanded="true" height="82" name="Select Attributes (154)" width="90" x="581" y="289">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value="id_1|id_2|id_3"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.7.000" expanded="true" height="82" name="Rename (4)" width="90" x="715" y="289">
            <parameter key="old_name" value="id_1"/>
            <parameter key="new_name" value="distance1"/>
            <list key="rename_additional_attributes">
              <parameter key="id_2" value="distance2"/>
              <parameter key="id_3" value="distance3"/>
            </list>
          </operator>
          <operator activated="true" breakpoints="after" class="concurrency:join" compatibility="9.7.000" expanded="true" height="82" name="Join (42)" width="90" x="849" y="289">
            <parameter key="remove_double_attributes" value="true"/>
            <parameter key="join_type" value="inner"/>
            <parameter key="use_id_attribute_as_key" value="true"/>
            <list key="key_attributes"/>
            <parameter key="keep_both_join_attributes" value="false"/>
          </operator>
          <connect from_op="Generate ID (117)" from_port="example set output" to_op="Multiply (20)" to_port="input"/>
          <connect from_op="Multiply (20)" from_port="output 1" to_op="Transpose (82)" to_port="example set input"/>
          <connect from_op="Transpose (82)" from_port="example set output" to_op="Loop Attributes (5)" to_port="input 1"/>
          <connect from_op="Transpose (82)" from_port="original" to_op="Join (42)" to_port="left"/>
          <connect from_op="Loop Attributes (5)" from_port="output 1" to_op="Loop Collection (3)" to_port="collection"/>
          <connect from_op="Recall (6)" from_port="result" to_op="Generate ID (118)" to_port="example set input"/>
          <connect from_op="Generate ID (118)" from_port="example set output" to_op="Transpose (83)" to_port="example set input"/>
          <connect from_op="Transpose (83)" from_port="example set output" to_op="Select Attributes (154)" to_port="example set input"/>
          <connect from_op="Select Attributes (154)" from_port="example set output" to_op="Rename (4)" to_port="example set input"/>
          <connect from_op="Rename (4)" from_port="example set output" to_op="Join (42)" to_port="right"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
        </process>
      </operator>
    </process>
    


Sign In or Register to comment.