🎉 🎉 RAPIDMINER 9.10 IS OUT!!! 🎉🎉

Download the latest version helping analytics teams accelerate time-to-value for streaming and IIOT use cases.


"[SOLVED] Generate Attribute - Duplicate"

rowan_growan_g Member Posts: 47 Contributor II
edited June 2019 in Help

I was wondering if it's possible to look at a particular attribute and determine duplicates in the Generate Attributes operator.
i.e. If Att1 has duplicates then Att2 = x




  • SkirzynskiSkirzynski Member Posts: 164  Maven

    It is not possible in the "Generate Attributes" operator directly, but with a few preceding operators. See the process below.


    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.3.006">
      <operator activated="true" class="process" compatibility="5.3.006" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="generate_nominal_data" compatibility="5.3.006" expanded="true" height="60" name="Generate Nominal Data" width="90" x="45" y="30">
            <parameter key="number_examples" value="10"/>
            <parameter key="number_of_attributes" value="1"/>
          <operator activated="true" class="aggregate" compatibility="5.3.006" expanded="true" height="76" name="Aggregate" width="90" x="179" y="120">
            <list key="aggregation_attributes">
              <parameter key="att1" value="count"/>
            <parameter key="group_by_attributes" value="|att1"/>
          <operator activated="true" class="rename" compatibility="5.3.006" expanded="true" height="76" name="Rename" width="90" x="313" y="30">
            <parameter key="old_name" value="count(att1)"/>
            <parameter key="new_name" value="count"/>
            <list key="rename_additional_attributes"/>
          <operator activated="true" class="join" compatibility="5.3.006" expanded="true" height="76" name="Join" width="90" x="447" y="120">
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="att1" value="att1"/>
          <operator activated="true" class="generate_attributes" compatibility="5.3.006" expanded="true" height="76" name="Generate Attributes" width="90" x="581" y="120">
            <list key="function_descriptions">
              <parameter key="att2" value="if(count&gt;1, &quot;duplicates&quot;, &quot;no duplicates&quot;)"/>
          <connect from_op="Generate Nominal Data" from_port="output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="original" to_op="Join" to_port="right"/>
          <connect from_op="Rename" from_port="example set output" to_op="Join" to_port="left"/>
          <connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
  • rowan_growan_g Member Posts: 47 Contributor II
    Thanks for that!
Sign In or Register to comment.