The RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent licensing related requests from Students/Faculty members, please use the Altair academic forum here.
Why is the minimum of integers a float-formatted String?
When solving a task from the tutorial, I encountered the following behaviour. The generation of a macro via generate macro --> statistics --> min generates a string in float format from a line of integer values. Why is the result not an integer again, but has to be converted via generate macro.
The upper part of the example (in the illustration) may be ignored. That was only part of the tutorial.
The upper part of the example (in the illustration) may be ignored. That was only part of the tutorial.
<?xml version="1.0" encoding="UTF-8"?><process version="9.10.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.10.001" expanded="true" height="68" name="Retrieve Titanic" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Titanic"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.10.001" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="238">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="Passenger Class"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="aggregate" compatibility="9.10.001" expanded="true" height="82" name="Aggregate" width="90" x="179" y="238">
<parameter key="use_default_aggregation" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="average"/>
<list key="aggregation_attributes">
<parameter key="Passenger Class" value="count"/>
</list>
<parameter key="group_by_attributes" value="Passenger Class"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="9.10.001" expanded="true" height="68" name="Extract {min class size}" width="90" x="313" y="238">
<parameter key="macro" value="min class size"/>
<parameter key="macro_type" value="statistics"/>
<parameter key="statistics" value="min"/>
<parameter key="attribute_name" value="count(Passenger Class)"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="generate_macro" compatibility="9.10.001" expanded="true" height="82" name="Generate {min nb}}" width="90" x="447" y="238">
<list key="function_descriptions">
<parameter key="min nb" value="round(parse(%{min class size}))"/>
</list>
</operator>
<operator activated="true" class="set_macro" compatibility="9.10.001" expanded="true" height="103" name="Set {max size}" width="90" x="179" y="34">
<parameter key="macro" value="max size"/>
<parameter key="value" value="%{min nb}"/>
</operator>
<operator activated="true" class="concurrency:loop_values" compatibility="9.10.001" expanded="true" height="82" name="Loop Values" width="90" x="313" y="34">
<parameter key="attribute" value="Passenger Class"/>
<parameter key="iteration_macro" value="loop_value"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="filter_examples" compatibility="9.10.001" expanded="true" height="103" name="Filter Examples" width="90" x="45" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="Passenger Class.equals.%{loop_value}"/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="branch" compatibility="9.10.001" expanded="true" height="82" name="Branch" width="90" x="179" y="34">
<parameter key="condition_type" value="max_examples"/>
<parameter key="condition_value" value="%{max size}"/>
<parameter key="expression" value=""/>
<parameter key="io_object" value="ANOVAMatrix"/>
<parameter key="return_inner_output" value="true"/>
<process expanded="true">
<connect from_port="condition" to_port="input 1"/>
<portSpacing port="source_condition" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_input 1" spacing="0"/>
<portSpacing port="sink_input 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="sample" compatibility="9.10.001" expanded="true" height="82" name="Sample" width="90" x="112" y="34">
<parameter key="sample" value="absolute"/>
<parameter key="balance_data" value="false"/>
<parameter key="sample_size" value="%{max size}"/>
<parameter key="sample_ratio" value="0.1"/>
<parameter key="sample_probability" value="0.1"/>
<list key="sample_size_per_class"/>
<list key="sample_ratio_per_class"/>
<list key="sample_probability_per_class"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<connect from_port="condition" to_op="Sample" to_port="example set input"/>
<connect from_op="Sample" from_port="example set output" to_port="input 1"/>
<portSpacing port="source_condition" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_input 1" spacing="0"/>
<portSpacing port="sink_input 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Branch" to_port="condition"/>
<connect from_op="Branch" from_port="input 1" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="9.10.001" expanded="true" height="82" name="Append" width="90" x="447" y="34">
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="merge_type" value="all"/>
</operator>
<connect from_op="Retrieve Titanic" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="original" to_op="Set {max size}" to_port="through 1"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Extract {min class size}" to_port="example set"/>
<connect from_op="Extract {min class size}" from_port="example set" to_op="Generate {min nb}}" to_port="through 1"/>
<connect from_op="Generate {min nb}}" from_port="through 1" to_op="Set {max size}" to_port="through 2"/>
<connect from_op="Set {max size}" from_port="through 1" to_op="Loop Values" to_port="input 1"/>
<connect from_op="Loop Values" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<background height="232" location="//Samples/Tutorials/Data Handling/05/tutorial5" width="1502" x="26" y="47"/>
</process>
</operator>
</process>
0
Best Answer
-
MartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,525 RM Data ScientistHi,Macros are always strings. Extract Macro uses some default format for the conversion to string which includes the .0 in it. I think the tutorial just works around this .You can also just use Extact Macro (Format) of operator toolbox, which allows you to specifically define your integer format.Cheers,Martin- Sr. Director Data Solutions, Altair RapidMiner -
Dortmund, Germany1