🎉 🎉 RAPIDMINER 9.10 IS OUT!!! 🎉🎉

Download the latest version helping analytics teams accelerate time-to-value for streaming and IIOT use cases.

CLICK HERE TO DOWNLOAD

How to perform aggregation on year and half year data of customer

sgnarkhede2016sgnarkhede2016 Member Posts: 133 Contributor II
edited April 2020 in Help
Hello ,

form the attached file I want to generate the below table but from generation of data I have a Configuration file that contains one column "BIN" contains month, quarter,half-year and year, another column "ENABLE" that contain Yes or No. I used the filter operator to calculate values only on Yes attribute and store in the respective columns.

Customer Daily-AvgAmt

Daily-STDAmt

Daily-AvgVolume

Daily-STDVolume

For the calculation, consider all the customer to generate value on month/daily/yearly


above for daily same column added for Monthly/Yearly

I want to calculate the "number of transaction", "avg amount"," std devation", "Daily Limit " of the customer  as per selected value in the config(Year/Half year)

<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.5.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Config_ProfileOnBins" width="90" x="45" y="34">
        <parameter key="repository_entry" value="../Data/Config_ProfileOnBins"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.5.001" expanded="true" height="103" name="Filter Examples (22)" width="90" x="179" y="34">
        <parameter key="parameter_expression" value=""/>
        <parameter key="condition_class" value="custom_filters"/>
        <parameter key="invert_filter" value="false"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="ENABLE.equals.Yes"/>
        </list>
        <parameter key="filters_logic_and" value="true"/>
        <parameter key="filters_check_metadata" value="true"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="34">
        <parameter key="macro" value="number_of_bins"/>
        <parameter key="macro_type" value="number_of_examples"/>
        <parameter key="statistics" value="average"/>
        <parameter key="attribute_name" value=""/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="false" class="utility:create_exampleset" compatibility="9.5.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="595">
        <parameter key="generator_type" value="numeric series"/>
        <parameter key="number_of_examples" value="1000"/>
        <parameter key="use_stepsize" value="true"/>
        <list key="function_descriptions"/>
        <parameter key="add_id_attribute" value="false"/>
        <list key="numeric_series_configuration">
          <parameter key="ProfileID" value="linear.1\.0.1\.0"/>
        </list>
        <list key="date_series_configuration"/>
        <list key="date_series_configuration (interval)"/>
        <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="column_separator" value=","/>
        <parameter key="parse_all_as_nominal" value="false"/>
        <parameter key="decimal_point_character" value="."/>
        <parameter key="trim_attribute_names" value="true"/>
      </operator>
      <operator activated="false" class="generate_attributes" compatibility="9.5.001" expanded="true" height="82" name="Generate Attributes (5)" width="90" x="246" y="595">
        <list key="function_descriptions">
          <parameter key="NEW_ID" value="concat(&quot;CBP&quot;,suffix(concat(&quot;0000&quot;,str(ProfileID)),5))"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve dummy Data" width="90" x="45" y="238">
        <parameter key="repository_entry" value="../Data/dummy Data"/>
      </operator>
      <operator activated="true" breakpoints="after" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="179" y="238">
        <process expanded="true">
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical" width="90" x="45" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="year"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="era"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (2)" width="90" x="179" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="quarter"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Exract Quarter of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (3)" width="90" x="313" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="month"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Month of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (4)" width="90" x="447" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="week"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Week of Month</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (6)" width="90" x="581" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="half year"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Half of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (15)" width="90" x="715" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="day"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="week"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Day of Month</description>
          </operator>
          <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename Extracted Features" width="90" x="849" y="34">
            <parameter key="old_name" value="InputDateTime_year"/>
            <parameter key="new_name" value="YEAR"/>
            <list key="rename_additional_attributes">
              <parameter key="InputDateTime_quarter" value="QUARTER"/>
              <parameter key="InputDateTime_month" value="MONTH"/>
              <parameter key="InputDateTime_week" value="WEEK"/>
              <parameter key="InputDateTime_half year" value="HALF YEAR"/>
              <parameter key="InputDateTime_day" value="DAY"/>
            </list>
          </operator>
          <connect from_port="in 1" to_op="Date to Numerical" to_port="example set input"/>
          <connect from_op="Date to Numerical" from_port="example set output" to_op="Date to Numerical (2)" to_port="example set input"/>
          <connect from_op="Date to Numerical (2)" from_port="example set output" to_op="Date to Numerical (3)" to_port="example set input"/>
          <connect from_op="Date to Numerical (3)" from_port="example set output" to_op="Date to Numerical (4)" to_port="example set input"/>
          <connect from_op="Date to Numerical (4)" from_port="example set output" to_op="Date to Numerical (6)" to_port="example set input"/>
          <connect from_op="Date to Numerical (6)" from_port="example set output" to_op="Date to Numerical (15)" to_port="example set input"/>
          <connect from_op="Date to Numerical (15)" from_port="example set output" to_op="Rename Extracted Features" to_port="example set input"/>
          <connect from_op="Rename Extracted Features" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Data Preparation</description>
      </operator>
      <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename" width="90" x="313" y="238">
        <parameter key="old_name" value="Amount"/>
        <parameter key="new_name" value="AMOUNT"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="concurrency:loop_values" compatibility="9.5.001" expanded="true" height="103" name="Loop Values" width="90" x="514" y="187">
        <parameter key="attribute" value="%{number_of_bins}"/>
        <parameter key="iteration_macro" value="loop_bin"/>
        <parameter key="reuse_results" value="false"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (3)" width="90" x="246" y="238">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="AMOUNT" value="average"/>
              <parameter key="AMOUNT" value="minimum"/>
              <parameter key="AMOUNT" value="maximum"/>
              <parameter key="AMOUNT" value="standard_deviation"/>
              <parameter key="AMOUNT" value="median"/>
              <parameter key="AMOUNT" value="sum"/>
            </list>
            <parameter key="group_by_attributes" value="Customer|%{loop_bin}"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" breakpoints="after" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (256)" width="90" x="380" y="238">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="FREQUENCY" value="average"/>
              <parameter key="FREQUENCY" value="minimum"/>
              <parameter key="FREQUENCY" value="maximum"/>
              <parameter key="FREQUENCY" value="standard_deviation"/>
              <parameter key="FREQUENCY" value="median"/>
              <parameter key="FREQUENCY" value="sum"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="average"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="minimum"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="maximum"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="standard_deviation"/>
            </list>
            <parameter key="group_by_attributes" value="%{loop_bin}|Customer"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" class="filter_example_range" compatibility="9.5.001" expanded="true" height="82" name="Filter Example Range (2)" width="90" x="112" y="34">
            <parameter key="first_example" value="%{iteration}"/>
            <parameter key="last_example" value="%{iteration}"/>
            <parameter key="invert_filter" value="false"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro (42)" width="90" x="246" y="34">
            <parameter key="macro" value="key_attribute_type1"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="BIN"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <connect from_port="input 1" to_op="Filter Example Range (2)" to_port="example set input"/>
          <connect from_port="input 2" to_op="Aggregate (3)" to_port="example set input"/>
          <connect from_op="Aggregate (3)" from_port="example set output" to_op="Aggregate (256)" to_port="example set input"/>
          <connect from_op="Filter Example Range (2)" from_port="example set output" to_op="Extract Macro (42)" to_port="example set"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="source_input 3" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve Config_ProfileOnBins" from_port="output" to_op="Filter Examples (22)" to_port="example set input"/>
      <connect from_op="Filter Examples (22)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Loop Values" to_port="input 1"/>
      <connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes (5)" to_port="example set input"/>
      <connect from_op="Retrieve dummy Data" from_port="output" to_op="Subprocess (2)" to_port="in 1"/>
      <connect from_op="Subprocess (2)" from_port="out 1" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Loop Values" to_port="input 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
    </process>
  </operator>
</process>

Best Answer

Sign In or Register to comment.