Options

[SOLVED] Total occurrences of a attribute

TommyR22TommyR22 Member Posts: 2 Contributor I
edited November 2018 in Help
Hello,
I work in a project of data mining with rapidminer.I have a .tsv file with attribute userid-artist-track.
Now i want that rapid count total occurrences of each artist in the exampleSet to find the most famous artist in dataset.
How can i do that?

thank you :)

Answers

  • Options
    awchisholmawchisholm RapidMiner Certified Expert, Member Posts: 458 Unicorn
    Hello

    The Aggregate operator is the one to use

    Here's an example
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.3.013">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="subprocess" compatibility="5.3.013" expanded="true" height="76" name="make fake data" width="90" x="112" y="120">
            <process expanded="true">
              <operator activated="true" class="generate_data_user_specification" compatibility="5.3.013" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="112" y="30">
                <list key="attribute_values">
                  <parameter key="userid" value="1"/>
                  <parameter key="artist" value="&quot;fred&quot;"/>
                  <parameter key="track" value="&quot;track01&quot;"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="5.3.013" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="112" y="120">
                <list key="attribute_values">
                  <parameter key="userid" value="1"/>
                  <parameter key="artist" value="&quot;fred&quot;"/>
                  <parameter key="track" value="&quot;track02&quot;"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="generate_data_user_specification" compatibility="5.3.013" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="112" y="210">
                <list key="attribute_values">
                  <parameter key="userid" value="1"/>
                  <parameter key="artist" value="&quot;bill&quot;"/>
                  <parameter key="track" value="&quot;track01&quot;"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="true" class="append" compatibility="5.3.013" expanded="true" height="112" name="Append" width="90" x="313" y="120"/>
              <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
              <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
              <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
              <connect from_op="Append" from_port="merged set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="aggregate" compatibility="5.3.013" expanded="true" height="76" name="Aggregate" width="90" x="313" y="120">
            <list key="aggregation_attributes">
              <parameter key="artist" value="count"/>
            </list>
            <parameter key="group_by_attributes" value="|artist"/>
          </operator>
          <connect from_op="make fake data" from_port="out 1" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    regards

    Andrew
  • Options
    TommyR22TommyR22 Member Posts: 2 Contributor I
    thank you so much!
    :P
Sign In or Register to comment.