RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

How to perform aggregation on year and half year data of customer

sgnarkhede2016sgnarkhede2016 Member Posts: 83 Contributor II
edited April 1 in Help
Hello ,

form the attached file I want to generate the below table but from generation of data I have a Configuration file that contains one column "BIN" contains month, quarter,half-year and year, another column "ENABLE" that contain Yes or No. I used the filter operator to calculate values only on Yes attribute and store in the respective columns.

Customer Daily-AvgAmt

Daily-STDAmt

Daily-AvgVolume

Daily-STDVolume

For the calculation, consider all the customer to generate value on month/daily/yearly


above for daily same column added for Monthly/Yearly

I want to calculate the "number of transaction", "avg amount"," std devation", "Daily Limit " of the customer  as per selected value in the config(Year/Half year)

<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.5.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve Config_ProfileOnBins" width="90" x="45" y="34">
        <parameter key="repository_entry" value="../Data/Config_ProfileOnBins"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.5.001" expanded="true" height="103" name="Filter Examples (22)" width="90" x="179" y="34">
        <parameter key="parameter_expression" value=""/>
        <parameter key="condition_class" value="custom_filters"/>
        <parameter key="invert_filter" value="false"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="ENABLE.equals.Yes"/>
        </list>
        <parameter key="filters_logic_and" value="true"/>
        <parameter key="filters_check_metadata" value="true"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro" width="90" x="313" y="34">
        <parameter key="macro" value="number_of_bins"/>
        <parameter key="macro_type" value="number_of_examples"/>
        <parameter key="statistics" value="average"/>
        <parameter key="attribute_name" value=""/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="false" class="utility:create_exampleset" compatibility="9.5.001" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="595">
        <parameter key="generator_type" value="numeric series"/>
        <parameter key="number_of_examples" value="1000"/>
        <parameter key="use_stepsize" value="true"/>
        <list key="function_descriptions"/>
        <parameter key="add_id_attribute" value="false"/>
        <list key="numeric_series_configuration">
          <parameter key="ProfileID" value="linear.1\.0.1\.0"/>
        </list>
        <list key="date_series_configuration"/>
        <list key="date_series_configuration (interval)"/>
        <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="column_separator" value=","/>
        <parameter key="parse_all_as_nominal" value="false"/>
        <parameter key="decimal_point_character" value="."/>
        <parameter key="trim_attribute_names" value="true"/>
      </operator>
      <operator activated="false" class="generate_attributes" compatibility="9.5.001" expanded="true" height="82" name="Generate Attributes (5)" width="90" x="246" y="595">
        <list key="function_descriptions">
          <parameter key="NEW_ID" value="concat(&quot;CBP&quot;,suffix(concat(&quot;0000&quot;,str(ProfileID)),5))"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="9.5.001" expanded="true" height="68" name="Retrieve dummy Data" width="90" x="45" y="238">
        <parameter key="repository_entry" value="../Data/dummy Data"/>
      </operator>
      <operator activated="true" breakpoints="after" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="179" y="238">
        <process expanded="true">
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical" width="90" x="45" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="year"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="era"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (2)" width="90" x="179" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="quarter"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Exract Quarter of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (3)" width="90" x="313" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="month"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="year"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Month of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (4)" width="90" x="447" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="week"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Week of Month</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (6)" width="90" x="581" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="half year"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="month"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Half of Year</description>
          </operator>
          <operator activated="true" class="date_to_numerical" compatibility="9.0.001" expanded="true" height="82" name="Date to Numerical (15)" width="90" x="715" y="34">
            <parameter key="attribute_name" value="InputDateTime"/>
            <parameter key="time_unit" value="day"/>
            <parameter key="millisecond_relative_to" value="second"/>
            <parameter key="second_relative_to" value="minute"/>
            <parameter key="minute_relative_to" value="hour"/>
            <parameter key="hour_relative_to" value="day"/>
            <parameter key="day_relative_to" value="week"/>
            <parameter key="week_relative_to" value="month"/>
            <parameter key="month_relative_to" value="year"/>
            <parameter key="quarter_relative_to" value="year"/>
            <parameter key="half_year_relative_to" value="year"/>
            <parameter key="year_relative_to" value="epoch"/>
            <parameter key="keep_old_attribute" value="true"/>
            <description align="center" color="transparent" colored="false" width="126">Extract Day of Month</description>
          </operator>
          <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename Extracted Features" width="90" x="849" y="34">
            <parameter key="old_name" value="InputDateTime_year"/>
            <parameter key="new_name" value="YEAR"/>
            <list key="rename_additional_attributes">
              <parameter key="InputDateTime_quarter" value="QUARTER"/>
              <parameter key="InputDateTime_month" value="MONTH"/>
              <parameter key="InputDateTime_week" value="WEEK"/>
              <parameter key="InputDateTime_half year" value="HALF YEAR"/>
              <parameter key="InputDateTime_day" value="DAY"/>
            </list>
          </operator>
          <connect from_port="in 1" to_op="Date to Numerical" to_port="example set input"/>
          <connect from_op="Date to Numerical" from_port="example set output" to_op="Date to Numerical (2)" to_port="example set input"/>
          <connect from_op="Date to Numerical (2)" from_port="example set output" to_op="Date to Numerical (3)" to_port="example set input"/>
          <connect from_op="Date to Numerical (3)" from_port="example set output" to_op="Date to Numerical (4)" to_port="example set input"/>
          <connect from_op="Date to Numerical (4)" from_port="example set output" to_op="Date to Numerical (6)" to_port="example set input"/>
          <connect from_op="Date to Numerical (6)" from_port="example set output" to_op="Date to Numerical (15)" to_port="example set input"/>
          <connect from_op="Date to Numerical (15)" from_port="example set output" to_op="Rename Extracted Features" to_port="example set input"/>
          <connect from_op="Rename Extracted Features" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Data Preparation</description>
      </operator>
      <operator activated="true" class="rename" compatibility="9.5.001" expanded="true" height="82" name="Rename" width="90" x="313" y="238">
        <parameter key="old_name" value="Amount"/>
        <parameter key="new_name" value="AMOUNT"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="concurrency:loop_values" compatibility="9.5.001" expanded="true" height="103" name="Loop Values" width="90" x="514" y="187">
        <parameter key="attribute" value="%{number_of_bins}"/>
        <parameter key="iteration_macro" value="loop_bin"/>
        <parameter key="reuse_results" value="false"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (3)" width="90" x="246" y="238">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="AMOUNT" value="average"/>
              <parameter key="AMOUNT" value="minimum"/>
              <parameter key="AMOUNT" value="maximum"/>
              <parameter key="AMOUNT" value="standard_deviation"/>
              <parameter key="AMOUNT" value="median"/>
              <parameter key="AMOUNT" value="sum"/>
            </list>
            <parameter key="group_by_attributes" value="Customer|%{loop_bin}"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" breakpoints="after" class="aggregate" compatibility="8.2.000" expanded="true" height="82" name="Aggregate (256)" width="90" x="380" y="238">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="FREQUENCY" value="average"/>
              <parameter key="FREQUENCY" value="minimum"/>
              <parameter key="FREQUENCY" value="maximum"/>
              <parameter key="FREQUENCY" value="standard_deviation"/>
              <parameter key="FREQUENCY" value="median"/>
              <parameter key="FREQUENCY" value="sum"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="average"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="minimum"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="maximum"/>
              <parameter key="TOTAL_AMOUNT_PER_BIN" value="standard_deviation"/>
            </list>
            <parameter key="group_by_attributes" value="%{loop_bin}|Customer"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <operator activated="true" class="filter_example_range" compatibility="9.5.001" expanded="true" height="82" name="Filter Example Range (2)" width="90" x="112" y="34">
            <parameter key="first_example" value="%{iteration}"/>
            <parameter key="last_example" value="%{iteration}"/>
            <parameter key="invert_filter" value="false"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="9.5.001" expanded="true" height="68" name="Extract Macro (42)" width="90" x="246" y="34">
            <parameter key="macro" value="key_attribute_type1"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="statistics" value="average"/>
            <parameter key="attribute_name" value="BIN"/>
            <parameter key="example_index" value="1"/>
            <list key="additional_macros"/>
          </operator>
          <connect from_port="input 1" to_op="Filter Example Range (2)" to_port="example set input"/>
          <connect from_port="input 2" to_op="Aggregate (3)" to_port="example set input"/>
          <connect from_op="Aggregate (3)" from_port="example set output" to_op="Aggregate (256)" to_port="example set input"/>
          <connect from_op="Filter Example Range (2)" from_port="example set output" to_op="Extract Macro (42)" to_port="example set"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="source_input 3" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve Config_ProfileOnBins" from_port="output" to_op="Filter Examples (22)" to_port="example set input"/>
      <connect from_op="Filter Examples (22)" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Loop Values" to_port="input 1"/>
      <connect from_op="Create ExampleSet" from_port="output" to_op="Generate Attributes (5)" to_port="example set input"/>
      <connect from_op="Retrieve dummy Data" from_port="output" to_op="Subprocess (2)" to_port="in 1"/>
      <connect from_op="Subprocess (2)" from_port="out 1" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Loop Values" to_port="input 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
    </process>
  </operator>
</process>

Best Answer

Sign In or Register to comment.