The RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent licensing related requests from Students/Faculty members, please use the Altair academic forum here.
Generate Attribute to find 2nd and 3rd minimums
Have a large set 17 atts, ~1.6 mil rows. Looking to generate 3 attributes; minimum, 2nd min, 3rd min of each row. The first minimum is simple using the generate aggregation block but after that it gets messy. I can think of a few ways to do this, setting a macro of the min and doing 2 huge if statement sets in generate attributes where it checks every att to see if it matches the min val and returning missing else returning the value ect or a loop with transpose and sorting but these seem a bit in-elegant. Any simpler methods I am missing?
Tagged:
0
Best Answer
-
hbajpai Member Posts: 102 UnicornHey @ZKuiper
I can think of a leaner way to do this utilizing one sorting that can help you extract all the mins together as macros. Check out the XML.<?xml version="1.0" encoding="UTF-8"?><process version="9.7.001"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.7.001" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value=""/> <parameter key="process_duration_for_mail" value="30"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="retrieve" compatibility="9.7.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="34"> <parameter key="repository_entry" value="//Samples/data/Sonar"/> </operator> <operator activated="true" class="generate_attributes" compatibility="9.7.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="179" y="136"> <list key="function_descriptions"> <parameter key="min_1" value="MISSING_NUMERIC"/> <parameter key="min_2" value="MISSING_NUMERIC"/> <parameter key="min_3" value="MISSING_NUMERIC"/> </list> <parameter key="keep_all" value="true"/> </operator> <operator activated="true" class="extract_macro" compatibility="9.7.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="136"> <parameter key="macro" value="total_rows"/> <parameter key="macro_type" value="number_of_examples"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value=""/> <list key="additional_macros"/> </operator> <operator activated="true" class="concurrency:loop" compatibility="9.7.001" expanded="true" height="82" name="Loop" width="90" x="447" y="136"> <parameter key="number_of_iterations" value="%{total_rows}"/> <parameter key="iteration_macro" value="i"/> <parameter key="reuse_results" value="false"/> <parameter key="enable_parallel_execution" value="true"/> <process expanded="true"> <operator activated="true" class="filter_example_range" compatibility="9.7.001" expanded="true" height="82" name="Filter Example Range" width="90" x="112" y="85"> <parameter key="first_example" value="%{i}"/> <parameter key="last_example" value="%{i}"/> <parameter key="invert_filter" value="false"/> </operator> <operator activated="true" class="multiply" compatibility="9.7.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="85"/> <operator activated="true" class="transpose" compatibility="9.7.001" expanded="true" height="82" name="Transpose" width="90" x="380" y="340"/> <operator activated="true" class="sort" compatibility="9.7.001" expanded="true" height="82" name="Sort" width="90" x="514" y="340"> <parameter key="attribute_name" value="att_1"/> <parameter key="sorting_direction" value="increasing"/> </operator> <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format)" width="90" x="648" y="340"> <parameter key="macro" value="min_1"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="att_1"/> <parameter key="example_index" value="1"/> <list key="additional_macros"/> <parameter key="format_of_numericals" value="#.##"/> <parameter key="date_format" value=""/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> </operator> <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (2)" width="90" x="782" y="340"> <parameter key="macro" value="min_2"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="att_1"/> <parameter key="example_index" value="2"/> <list key="additional_macros"/> <parameter key="format_of_numericals" value="#.##"/> <parameter key="date_format" value=""/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> </operator> <operator activated="true" class="operator_toolbox:extract_macro_enhanced" compatibility="2.6.000" expanded="true" height="68" name="Extract Macro (Format) (3)" width="90" x="916" y="340"> <parameter key="macro" value="min_3"/> <parameter key="macro_type" value="data_value"/> <parameter key="statistics" value="average"/> <parameter key="attribute_name" value="att_1"/> <parameter key="example_index" value="3"/> <list key="additional_macros"/> <parameter key="format_of_numericals" value="#.##"/> <parameter key="date_format" value=""/> <parameter key="time_zone" value="SYSTEM"/> <parameter key="locale" value="English (United States)"/> </operator> <operator activated="true" class="generate_attributes" compatibility="9.7.001" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="447" y="85"> <list key="function_descriptions"> <parameter key="min_1" value="eval(%{min_1})"/> <parameter key="min_2" value="eval(%{min_2})"/> <parameter key="min_3" value="eval(%{min_3})"/> </list> <parameter key="keep_all" value="true"/> </operator> <connect from_port="input 1" to_op="Filter Example Range" to_port="example set input"/> <connect from_op="Filter Example Range" from_port="example set output" to_op="Multiply" to_port="input"/> <connect from_op="Multiply" from_port="output 1" to_op="Transpose" to_port="example set input"/> <connect from_op="Multiply" from_port="output 2" to_op="Generate Attributes (3)" to_port="example set input"/> <connect from_op="Transpose" from_port="example set output" to_op="Sort" to_port="example set input"/> <connect from_op="Sort" from_port="example set output" to_op="Extract Macro (Format)" to_port="example set"/> <connect from_op="Extract Macro (Format)" from_port="example set" to_op="Extract Macro (Format) (2)" to_port="example set"/> <connect from_op="Extract Macro (Format) (2)" from_port="example set" to_op="Extract Macro (Format) (3)" to_port="example set"/> <connect from_op="Generate Attributes (3)" from_port="example set output" to_port="output 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="source_input 2" spacing="0"/> <portSpacing port="sink_output 1" spacing="0"/> <portSpacing port="sink_output 2" spacing="0"/> </process> </operator> <operator activated="true" class="append" compatibility="9.7.001" expanded="true" height="82" name="Append" width="90" x="581" y="136"> <parameter key="datamanagement" value="double_array"/> <parameter key="data_management" value="auto"/> <parameter key="merge_type" value="all"/> </operator> <connect from_op="Retrieve Sonar" from_port="output" to_op="Generate Attributes (2)" to_port="example set input"/> <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Extract Macro" to_port="example set"/> <connect from_op="Extract Macro" from_port="example set" to_op="Loop" to_port="input 1"/> <connect from_op="Loop" from_port="output 1" to_op="Append" to_port="example set 1"/> <connect from_op="Append" from_port="merged set" to_port="result 1"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> </process> </operator> </process>
Best,
Harshit1
Answers