Options

Please help me interpret my results (totally newbie to this)

ConstantineConstantine Member Posts: 2 Newbie
Hey everyone,

I would like to ask you for help, in order to help me understand the results i have.
I am in the process of doing my thesis, and for this, i'm doing an RFM analysis on a big retailer, with the help of RapidMiner.

However, i frankly do not know 90% of the stuff that are going there, as my background is in marketing. My professor was very helpful and he taught me some things and how the tool works, and he made the xml process for me. Unfortunately, he got a nasty health issue and i don't want to bother him. 

I have everything ready, and i'm fairly sure i can run it, with a bit of trouble with my potato of PC. The problem is, i do not know what to make of the results and would greatly appreciate if somebody spends 1-2h with me to help explain what i'm looking at. Please excuse me but i cannot provide the dataset as it's confidential. I will paste the process below.

Thank you in advance for all the help!
Kosta

<?xml version="1.0" encoding="UTF-8"?><process version="9.10.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.10.008" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.10.008" expanded="true" height="68" name="Retrieve sales" width="90" x="45" y="136">
        <parameter key="repository_entry" value="//krigas/sales"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="9.10.008" expanded="true" height="145" name="Multiply" width="90" x="179" y="85"/>
      <operator activated="true" class="numerical_to_polynominal" compatibility="9.10.008" expanded="true" height="82" name="Numerical to Polynominal (2)" width="90" x="447" y="391">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="item_id"/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="numeric"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="real"/>
        <parameter key="block_type" value="value_series"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_series_end"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="9.10.008" expanded="true" height="82" name="Aggregate for F" width="90" x="380" y="136">
        <parameter key="use_default_aggregation" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="default_aggregation_function" value="average"/>
        <list key="aggregation_attributes">
          <parameter key="date_trns" value="count"/>
        </list>
        <parameter key="group_by_attributes" value="hh_id"/>
        <parameter key="count_all_combinations" value="false"/>
        <parameter key="only_distinct" value="false"/>
        <parameter key="ignore_missings" value="true"/>
      </operator>
      <operator activated="true" class="blending:sort" compatibility="9.10.008" expanded="true" height="82" name="Sort (2)" width="90" x="581" y="136">
        <list key="sort_by">
          <parameter key="hh_id" value="ascending"/>
        </list>
      </operator>
      <operator activated="true" class="aggregate" compatibility="9.10.008" expanded="true" height="82" name="Aggregate for R" width="90" x="380" y="238">
        <parameter key="use_default_aggregation" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="default_aggregation_function" value="average"/>
        <list key="aggregation_attributes">
          <parameter key="date_trns" value="maximum"/>
        </list>
        <parameter key="group_by_attributes" value="hh_id"/>
        <parameter key="count_all_combinations" value="false"/>
        <parameter key="only_distinct" value="false"/>
        <parameter key="ignore_missings" value="true"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="9.10.008" expanded="true" height="82" name="Generate Attributes" width="90" x="581" y="238">
        <list key="function_descriptions">
          <parameter key="R_in_months" value="date_diff([maximum(date_trns)],date_parse_custom(&quot;03.03.2014&quot;,&quot;dd.MM.yyyy&quot;))/6.048e+8"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.10.008" expanded="true" height="82" name="Select Attributes" width="90" x="715" y="238">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="hh_id|R_in_months"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="blending:sort" compatibility="9.10.008" expanded="true" height="82" name="Sort (3)" width="90" x="849" y="238">
        <list key="sort_by">
          <parameter key="hh_id" value="ascending"/>
        </list>
      </operator>
      <operator activated="true" class="aggregate" compatibility="9.10.008" expanded="true" height="82" name="Aggregate for M" width="90" x="380" y="34">
        <parameter key="use_default_aggregation" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="default_aggregation_function" value="average"/>
        <list key="aggregation_attributes">
          <parameter key="price" value="sum"/>
        </list>
        <parameter key="group_by_attributes" value="hh_id"/>
        <parameter key="count_all_combinations" value="false"/>
        <parameter key="only_distinct" value="false"/>
        <parameter key="ignore_missings" value="true"/>
      </operator>
      <operator activated="true" class="blending:sort" compatibility="9.10.008" expanded="true" height="82" name="Sort" width="90" x="581" y="34">
        <list key="sort_by">
          <parameter key="hh_id" value="ascending"/>
        </list>
      </operator>
      <operator activated="true" class="concurrency:join" compatibility="9.10.008" expanded="true" height="82" name="Join" width="90" x="782" y="136">
        <parameter key="remove_double_attributes" value="true"/>
        <parameter key="join_type" value="inner"/>
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="hh_id" value="hh_id"/>
        </list>
        <parameter key="keep_both_join_attributes" value="false"/>
      </operator>
      <operator activated="true" class="concurrency:join" compatibility="9.10.008" expanded="true" height="82" name="Join (2)" width="90" x="983" y="187">
        <parameter key="remove_double_attributes" value="true"/>
        <parameter key="join_type" value="inner"/>
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="hh_id" value="hh_id"/>
        </list>
        <parameter key="keep_both_join_attributes" value="false"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="9.10.008" expanded="true" height="68" name="Retrieve products" width="90" x="45" y="493">
        <parameter key="repository_entry" value="//krigas/products"/>
      </operator>
      <operator activated="true" class="numerical_to_polynominal" compatibility="9.10.008" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="179" y="493">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="item_class_cd|item_id|item_sc_cd"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="numeric"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="real"/>
        <parameter key="block_type" value="value_series"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_series_end"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="9.10.008" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="380" y="493">
        <list key="function_descriptions">
          <parameter key="product_categpory" value="concat(item_grp_cd,&quot;_&quot;,item_class_cd,&quot;_&quot;,item_sc_cd)"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="concurrency:join" compatibility="9.10.008" expanded="true" height="82" name="Join (4)" width="90" x="715" y="493">
        <parameter key="remove_double_attributes" value="true"/>
        <parameter key="join_type" value="inner"/>
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="item_id" value="item_id"/>
        </list>
        <parameter key="keep_both_join_attributes" value="false"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.10.008" expanded="true" height="82" name="Select Attributes (2)" width="90" x="849" y="493">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="hh_id|product_categpory|date_trns"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="9.10.008" expanded="true" height="82" name="Aggregate" width="90" x="983" y="493">
        <parameter key="use_default_aggregation" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="default_aggregation_function" value="average"/>
        <list key="aggregation_attributes">
          <parameter key="product_categpory" value="count"/>
        </list>
        <parameter key="group_by_attributes" value="hh_id"/>
        <parameter key="count_all_combinations" value="false"/>
        <parameter key="only_distinct" value="false"/>
        <parameter key="ignore_missings" value="true"/>
      </operator>
      <operator activated="true" class="concurrency:join" compatibility="9.10.008" expanded="true" height="82" name="Join (3)" width="90" x="1117" y="238">
        <parameter key="remove_double_attributes" value="true"/>
        <parameter key="join_type" value="inner"/>
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="hh_id" value="hh_id"/>
        </list>
        <parameter key="keep_both_join_attributes" value="false"/>
      </operator>
      <operator activated="true" class="blending:rename" compatibility="9.10.008" expanded="true" height="82" name="Rename" width="90" x="1251" y="238">
        <list key="rename attributes">
          <parameter key="count(date_trns)" value="F"/>
          <parameter key="sum(price)" value="M"/>
          <parameter key="R_in_months" value="R"/>
          <parameter key="count(product_categpory)" value="V"/>
        </list>
        <parameter key="from_attribute" value=""/>
        <parameter key="to_attribute" value=""/>
      </operator>
      <operator activated="true" class="order_attributes" compatibility="9.10.008" expanded="true" height="82" name="Reorder Attributes" width="90" x="1385" y="238">
        <parameter key="sort_mode" value="user specified"/>
        <parameter key="attribute_ordering" value="hh_id|R|F|M"/>
        <parameter key="use_regular_expressions" value="false"/>
        <parameter key="handle_unmatched" value="append"/>
        <parameter key="sort_direction" value="ascending"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.10.008" expanded="true" height="82" name="Select Attributes (3)" width="90" x="1519" y="238">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="F|M|R|V"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="normalize" compatibility="9.10.008" expanded="true" height="103" name="Normalize" width="90" x="1653" y="238">
        <parameter key="return_preprocessing_model" value="false"/>
        <parameter key="create_view" value="false"/>
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="numeric"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="real"/>
        <parameter key="block_type" value="value_series"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_series_end"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
        <parameter key="method" value="Z-transformation"/>
        <parameter key="min" value="0.0"/>
        <parameter key="max" value="1.0"/>
        <parameter key="allow_negative_values" value="false"/>
      </operator>
      <operator activated="true" breakpoints="before" class="concurrency:k_means" compatibility="9.10.008" expanded="true" height="82" name="Clustering" width="90" x="1787" y="238">
        <parameter key="add_cluster_attribute" value="true"/>
        <parameter key="add_as_label" value="false"/>
        <parameter key="remove_unlabeled" value="false"/>
        <parameter key="k" value="5"/>
        <parameter key="max_runs" value="10"/>
        <parameter key="determine_good_start_values" value="true"/>
        <parameter key="measure_types" value="BregmanDivergences"/>
        <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
        <parameter key="nominal_measure" value="NominalDistance"/>
        <parameter key="numerical_measure" value="EuclideanDistance"/>
        <parameter key="divergence" value="SquaredEuclideanDistance"/>
        <parameter key="kernel_type" value="radial"/>
        <parameter key="kernel_gamma" value="1.0"/>
        <parameter key="kernel_sigma1" value="1.0"/>
        <parameter key="kernel_sigma2" value="0.0"/>
        <parameter key="kernel_sigma3" value="2.0"/>
        <parameter key="kernel_degree" value="3.0"/>
        <parameter key="kernel_shift" value="1.0"/>
        <parameter key="kernel_a" value="1.0"/>
        <parameter key="kernel_b" value="0.0"/>
        <parameter key="max_optimization_steps" value="100"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <connect from_op="Retrieve sales" from_port="output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Aggregate for M" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Aggregate for F" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 3" to_op="Aggregate for R" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 4" to_op="Numerical to Polynominal (2)" to_port="example set input"/>
      <connect from_op="Numerical to Polynominal (2)" from_port="example set output" to_op="Join (4)" to_port="left"/>
      <connect from_op="Aggregate for F" from_port="example set output" to_op="Sort (2)" to_port="example set input"/>
      <connect from_op="Sort (2)" from_port="example set output" to_op="Join" to_port="right"/>
      <connect from_op="Aggregate for R" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Sort (3)" to_port="example set input"/>
      <connect from_op="Sort (3)" from_port="example set output" to_op="Join (2)" to_port="right"/>
      <connect from_op="Aggregate for M" from_port="example set output" to_op="Sort" to_port="example set input"/>
      <connect from_op="Sort" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Join" from_port="join" to_op="Join (2)" to_port="left"/>
      <connect from_op="Join (2)" from_port="join" to_op="Join (3)" to_port="left"/>
      <connect from_op="Retrieve products" from_port="output" to_op="Numerical to Polynominal" to_port="example set input"/>
      <connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Join (4)" to_port="right"/>
      <connect from_op="Join (4)" from_port="join" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Join (3)" to_port="right"/>
      <connect from_op="Join (3)" from_port="join" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Reorder Attributes" to_port="example set input"/>
      <connect from_op="Reorder Attributes" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Normalize" to_port="example set input"/>
      <connect from_op="Normalize" from_port="example set output" to_op="Clustering" to_port="example set"/>
      <connect from_op="Clustering" from_port="cluster model" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Answers

  • Options
    MarcoBarradasMarcoBarradas Administrator, Employee, RapidMiner Certified Analyst, Member Posts: 272 Unicorn
    Hi @Constantine

    Take a look at this videos

    Clustering intro | RapidMiner

    Clustering demo


    In them you can learn a little more on cluster and how to interpret them and translate what you are seeing into words.


    Let us know if these helps.

  • Options
    ConstantineConstantine Member Posts: 2 Newbie
    Hey @MarcoBarradas

    Thank you so much! I'll have a look today. 

    Can i get back to you if I have any questions?

    Thank you kindly.
    Kosta
  • Options
    MarcoBarradasMarcoBarradas Administrator, Employee, RapidMiner Certified Analyst, Member Posts: 272 Unicorn
    Yes @Constantine feel free to contact me if you have any questions I'll try to help you.
Sign In or Register to comment.