🎉 🎉 RAPIDMINER 9.10 IS OUT!!! 🎉🎉

Download the latest version helping analytics teams accelerate time-to-value for streaming and IIOT use cases.

CLICK HERE TO DOWNLOAD

How to perform aggregate on Half year data when I have three year data FInd Attached process for ref

sgnarkhede2016sgnarkhede2016 Member Posts: 133 Contributor II
edited April 2020 in Help
Hello,
I have done all the aggregation month/quarter/yearly but in half-year am getting an issue because it's taking all customer record in all year in first half(1) and the same as the second half(2) during aggregation.

It should be like this
Year    
2016  1
          2

2017   1
           2

Currently, I getting 2016 & 2017 sums in 1 as well as 2 half

Please find attached process

<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.5.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve dummy Data" width="90" x="45" y="238">
        <parameter key="repository_entry" value="../Data/dummy Data"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="179" y="238">
        <process expanded="true">
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical" width="90" x="45" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="year"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="era"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (2)" width="90" x="179" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="quarter"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Exract Quarter of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (3)" width="90" x="313" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="month"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Month of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (4)" width="90" x="447" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="week"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Week of Month</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (6)" width="90" x="581" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="half year"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Half of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (15)" width="90" x="715" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="day"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="week"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Day of Month</description>
          </operator>
          <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename Extracted Features" width="90" x="849" y="34">
            <parameter key="old_name" value="InputDateTime_year"/>
            <parameter key="new_name" value="YEAR"/>
            <list key="rename_additional_attributes">
              <parameter key="InputDateTime_quarter" value="QUARTER"/>
              <parameter key="InputDateTime_month" value="MONTH"/>
              <parameter key="InputDateTime_week" value="WEEK"/>
              <parameter key="InputDateTime_half year" value="HALF YEAR"/>
              <parameter key="InputDateTime_day" value="DAY"/>
            </list>
          </operator>
          <connect from_port="in 1" to_op="Date to Numerical" to_port="example set input"/>
          <connect from_op="Date to Numerical" from_port="example set output" to_op="Date to Numerical (2)" to_port="example set input"/>
          <connect from_op="Date to Numerical (2)" from_port="example set output" to_op="Date to Numerical (3)" to_port="example set input"/>
          <connect from_op="Date to Numerical (3)" from_port="example set output" to_op="Date to Numerical (4)" to_port="example set input"/>
          <connect from_op="Date to Numerical (4)" from_port="example set output" to_op="Date to Numerical (6)" to_port="example set input"/>
          <connect from_op="Date to Numerical (6)" from_port="example set output" to_op="Date to Numerical (15)" to_port="example set input"/>
          <connect from_op="Date to Numerical (15)" from_port="example set output" to_op="Rename Extracted Features" to_port="example set input"/>
          <connect from_op="Rename Extracted Features" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Data Preparation</description>
      </operator>
      <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename" width="90" x="313" y="238">
        <parameter key="old_name" value="Amount"/>
        <parameter key="new_name" value="AMOUNT"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Config_ProfileOnBins" width="90" x="45" y="34">
        <parameter key="repository_entry" value="../Data/Config_ProfileOnBins"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.5.001" expanded="true" height="103" name="Filter Examples (22)" width="90" x="179" y="34">
        <parameter key="parameter_expression" value=""/>
        <parameter key="condition_class" value="custom_filters"/>
        <parameter key="invert_filter" value="false"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="ENABLE.equals.Yes"/>
        </list>
        <parameter key="filters_logic_and" value="true"/>
        <parameter key="filters_check_metadata" value="true"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="34">
        <parameter key="macro" value="no_of_examples"/>
        <parameter key="macro_type" value="number_of_examples"/>
        <parameter key="statistics" value="average"/>
        <parameter key="attribute_name" value=""/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="false" class="utility:create_exampleset" compatibility="9.5.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="595">
        <parameter key="generator_type" value="numeric series"/>
        <parameter key="number_of_examples" value="1000"/>
        <parameter key="use_stepsize" value="true"/>
        <list key="function_descriptions"/>
        <parameter key="add_id_attribute" value="false"/>
        <list key="numeric_series_configuration">
          <parameter key="ProfileID" value="linear.1\.0.1\.0"/>
        </list>
        <list key="date_series_configuration"/>
        <list key="date_series_configuration (interval)"/>
        <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="column_separator" value=","/>
        <parameter key="parse_all_as_nominal" value="false"/>
        <parameter key="decimal_point_character" value="."/>
        <parameter key="trim_attribute_names" value="true"/>
      </operator>
      <operator activated="false" class="generate_attributes" compatibility="9.5.001" expanded="true" height="82" name="Generate Attributes (5)" width="90" x="246" y="595">
        <list key="function_descriptions">
          <parameter key="NEW_ID" value="concat(&quot;CBP&quot;,suffix(concat(&quot;0000&quot;,str(ProfileID)),5))"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="concurrency:loop" compatibility="9.5.001" expanded="true" height="103" name="Loop" width="90" x="447" y="187">
        <parameter key="number_of_iterations" value="%{no_of_examples}"/>
        <parameter key="iteration_macro" value="iteration"/>
        <parameter key="reuse_results" value="false"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="filter_example_range" compatibility="9.5.001" expanded="true" height="82" name="Filter Example Range" width="90" x="45" y="34">
            <parameter key="first_example" value="%{iteration}"/>
            <parameter key="last_example" value="%{iteration}"/>
            <parameter key="invert_filter" value="false"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro (43)" width="90" x="179" y="34">
            <parameter key="macro" value="key_attribute_type1"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="BIN"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate" width="90" x="112" y="136">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="AMOUNT" value="sum"/>
            </list>
            <parameter key="group_by_attributes" value="|%{key_attribute_type1}|Customer"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (117)" width="90" x="246" y="289">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="Customer" value="count"/>
              <parameter key="AMOUNT" value="sum"/>
            </list>
            <parameter key="group_by_attributes" value="%{key_attribute_type1}|Customer|InputDateTime"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename (121)" width="90" x="380" y="289">
            <parameter key="old_name" value="count(Customer)"/>
            <parameter key="new_name" value="FREQUENCY"/>
            <list key="rename_additional_attributes">
              <parameter key="sum(AMOUNT)" value="TOTAL_AMOUNT_PER_BIN"/>
            </list>
          </operator>
          <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename (122)" width="90" x="246" y="136">
            <parameter key="old_name" value="sum(AMOUNT)"/>
            <parameter key="new_name" value="TOTAL AMOUNT"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="concurrency:join" compatibility="9.5.001" expanded="true" height="82" name="Join" width="90" x="447" y="136">
            <parameter key="remove_double_attributes" value="true"/>
            <parameter key="join_type" value="inner"/>
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="Customer" value="Customer"/>
            </list>
            <parameter key="keep_both_join_attributes" value="false"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="9.5.001" expanded="true" height="82" name="Generate Attributes" width="90" x="581" y="136">
            <list key="function_descriptions">
              <parameter key="BIN_TYPE" value="%{key_attribute_type1}"/>
            </list>
            <parameter key="keep_all" value="true"/>
          </operator>
          <connect from_port="input 1" to_op="Filter Example Range" to_port="example set input"/>
          <connect from_port="input 2" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Filter Example Range" from_port="example set output" to_op="Extract Macro (43)" to_port="example set"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Rename (122)" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="original" to_op="Aggregate (117)" to_port="example set input"/>
          <connect from_op="Aggregate (117)" from_port="example set output" to_op="Rename (121)" to_port="example set input"/>
          <connect from_op="Rename (121)" from_port="example set output" to_op="Join" to_port="right"/>
          <connect from_op="Rename (122)" from_port="example set output" to_op="Join" to_port="left"/>
          <connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="output 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="147"/>
          <portSpacing port="source_input 3" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="append" compatibility="9.5.001" expanded="true" height="68" name="Append" width="90" x="581" y="238">
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="merge_type" value="all"/>
      </operator>
      <connect from_op="Retrieve dummy Data" from_port="output" to_op="Subprocess (2)" to_port="in 1"/>
      <connect from_op="Subprocess (2)" from_port="out 1" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Loop" to_port="input 2"/>
      <connect from_op="Retrieve Config_ProfileOnBins" from_port="output" to_op="Filter Examples (22)" to_port="example set input"/>
      <connect from_op="Filter Examples (22)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Loop" to_port="input 1"/>
      <connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes (5)" to_port="example set input"/>
      <connect from_op="Loop" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <description align="center" color="yellow" colored="false" height="105" resized="false" width="180" x="136" y="227">Type your comment</description>
    </process>
  </operator>
</process>



Tagged:

Answers

  • hbajpaihbajpai Member Posts: 102   Unicorn
    edited April 2020
    sgnarkhede2016 Can you check the other thread of your question, I have tried to aggregate at both levels in the shared XML code.
    Best,
    Harshit
Sign In or Register to comment.