Options

matching datasets according to best ranked item

vabmvabm Member Posts: 4 Contributor I
edited November 2018 in Help
Hi, I have two data sets that I need to match according to some specific value.
The first one (table 1) contains the IDs of a set of products, and the performance of these product according to the age of a user, something like:

item_id, user_age, item_performance
01, 45, 2
02, 23, 4
03, 23, 3
04, 45, 5
etc
Then, the other one (table 2) has the age of a set of users, like:

user_id, age
001, 45
002, 23
003, 29
(etc)
What I need to do is to find the item that has the best performance for each age group, and then match them as a single dataset. Samething like:

user_id, item_id
001, 04
002, 02
003, 06
(etc)
I was trying to use aggregate to find the best score per age, but I am getting the best scores for a single age group... any ideas on how to find the best scores per age group, and then how to aggregate them to table 2? Thanks !!

Answers

  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,510 RM Data Scientist
    I think this is hard to do at all, because there is no way to join your first and your second table if ages are not unique.

    ~Martin
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • Options
    awchisholmawchisholm RapidMiner Certified Expert, Member Posts: 458 Unicorn
    Here's something that seems to do the trick...
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="7.0.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="7.0.000" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="subprocess" compatibility="7.0.000" expanded="true" height="82" name="user data" width="90" x="112" y="34">
            <process expanded="true">
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (5)" width="90" x="179" y="187">
                <list key="attribute_values">
                  <parameter key="userid" value="&quot;001&quot;"/>
                  <parameter key="age" value="&quot;45&quot;"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (6)" width="90" x="179" y="289">
                <list key="attribute_values">
                  <parameter key="userid" value="&quot;002&quot;"/>
                  <parameter key="age" value="&quot;23&quot;"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (7)" width="90" x="179" y="391">
                <list key="attribute_values">
                  <parameter key="userid" value="&quot;003&quot;"/>
                  <parameter key="age" value="&quot;29&quot;"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="124" name="Append (2)" width="90" x="313" y="187"/>
              <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append (2)" to_port="example set 1"/>
              <connect from_op="Generate Data by User Specification (6)" from_port="output" to_op="Append (2)" to_port="example set 2"/>
              <connect from_op="Generate Data by User Specification (7)" from_port="output" to_op="Append (2)" to_port="example set 3"/>
              <connect from_op="Append (2)" from_port="merged set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="subprocess" compatibility="7.0.000" expanded="true" height="82" name="item data" width="90" x="112" y="289">
            <process expanded="true">
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="179" y="187">
                <list key="attribute_values">
                  <parameter key="itemid" value="&quot;01&quot;"/>
                  <parameter key="age" value="&quot;45&quot;"/>
                  <parameter key="perf" value="2"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="179" y="289">
                <list key="attribute_values">
                  <parameter key="itemid" value="&quot;02&quot;"/>
                  <parameter key="age" value="&quot;23&quot;"/>
                  <parameter key="perf" value="4"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="179" y="391">
                <list key="attribute_values">
                  <parameter key="itemid" value="&quot;03&quot;"/>
                  <parameter key="age" value="&quot;45&quot;"/>
                  <parameter key="perf" value="3"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (8)" width="90" x="179" y="493">
                <list key="attribute_values">
                  <parameter key="itemid" value="&quot;04&quot;"/>
                  <parameter key="age" value="&quot;45&quot;"/>
                  <parameter key="perf" value="4"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (9)" width="90" x="179" y="595">
                <list key="attribute_values">
                  <parameter key="itemid" value="&quot;05&quot;"/>
                  <parameter key="age" value="&quot;45&quot;"/>
                  <parameter key="perf" value="1"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (10)" width="90" x="179" y="697">
                <list key="attribute_values">
                  <parameter key="itemid" value="&quot;06&quot;"/>
                  <parameter key="age" value="&quot;29&quot;"/>
                  <parameter key="perf" value="1"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="187" name="Append" width="90" x="447" y="187"/>
              <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 1"/>
              <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 2"/>
              <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 3"/>
              <connect from_op="Generate Data by User Specification (8)" from_port="output" to_op="Append" to_port="example set 4"/>
              <connect from_op="Generate Data by User Specification (9)" from_port="output" to_op="Append" to_port="example set 5"/>
              <connect from_op="Generate Data by User Specification (10)" from_port="output" to_op="Append" to_port="example set 6"/>
              <connect from_op="Append" from_port="merged set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="multiply" compatibility="7.0.000" expanded="true" height="103" name="Multiply" width="90" x="246" y="289"/>
          <operator activated="true" class="join" compatibility="7.0.000" expanded="true" height="82" name="Join" width="90" x="447" y="34">
            <parameter key="join_type" value="left"/>
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="age" value="age"/>
            </list>
          </operator>
          <operator activated="true" class="aggregate" compatibility="7.0.000" expanded="true" height="82" name="Aggregate" width="90" x="581" y="34">
            <list key="aggregation_attributes">
              <parameter key="perf" value="maximum"/>
            </list>
            <parameter key="group_by_attributes" value="userid|age"/>
          </operator>
          <operator activated="true" class="rename" compatibility="7.0.000" expanded="true" height="82" name="Rename" width="90" x="715" y="34">
            <parameter key="old_name" value="maximum(perf)"/>
            <parameter key="new_name" value="perf"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="join" compatibility="7.0.000" expanded="true" height="82" name="Join (2)" width="90" x="916" y="289">
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="age" value="age"/>
              <parameter key="perf" value="perf"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="7.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="1050" y="289">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="itemid|userid"/>
          </operator>
          <connect from_op="user data" from_port="out 1" to_op="Join" to_port="left"/>
          <connect from_op="item data" from_port="out 1" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Join" to_port="right"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Join (2)" to_port="right"/>
          <connect from_op="Join" from_port="join" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Join (2)" to_port="left"/>
          <connect from_op="Join (2)" from_port="join" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    regards

    Andrew
Sign In or Register to comment.