Answer to LinkedIn question - Data to Similarity

JEdwardJEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
edited November 2018 in Help
Hi Raj,

In answer to your question:
"I am using RapidMinor for Document Similarity .after getting output similarity ,I found that it has replaced ID given by me by its own ids . How to avoid this."
https://www.linkedin.com/groups/How-avoid-replace-Id-rapidminor-4025645.S.5981787877623816192

I am assuming that you are using the data to similarity operator and want to get your own IDs back. 
Here's an example using the Iris dataset that you can look at.  One way to do it is to give your original ID a new role (a pseudo-role) and name, then (after the similarity is an example set), join the datasets together twice (once on FIRST_ID and then again on SECOND_ID) and keep only your old ID after you have made each join. 
Paste the XML code below into the XML view in RapidMiner and then follow it through to get a better idea of how the pseudo-role works. 
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.3.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="6.3.000" expanded="true" height="60" name="Retrieve Iris" width="90" x="45" y="435">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="Set Role" width="90" x="45" y="255">
        <description>Setting the role of the original ID to id_old to make it 'disappear' to calculations. </description>
        <parameter key="attribute_name" value="id"/>
        <parameter key="target_role" value="id_old"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="rename" compatibility="6.3.000" expanded="true" height="76" name="Rename" width="90" x="45" y="120">
        <description>renaming the original ID because the later generates attributes operator also uses the same name. </description>
        <parameter key="old_name" value="id"/>
        <parameter key="new_name" value="id_old"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="6.3.000" expanded="true" height="94" name="Multiply" width="90" x="179" y="75"/>
      <operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="Select Attributes" width="90" x="246" y="255">
        <description>In this example I only care about IDs. You can add the other fields in if you like. </description>
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="id_old"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="generate_id" compatibility="6.3.000" expanded="true" height="76" name="Generate ID" width="90" x="246" y="480"/>
      <operator activated="true" class="data_to_similarity" compatibility="6.3.000" expanded="true" height="76" name="Data to Similarity" width="90" x="380" y="30">
        <parameter key="numerical_measure" value="KernelEuclideanDistance"/>
        <parameter key="divergence" value="LogisticLoss"/>
      </operator>
      <operator activated="true" class="similarity_to_data" compatibility="6.3.000" expanded="true" height="76" name="Similarity to Data" width="90" x="514" y="30"/>
      <operator activated="true" class="multiply" compatibility="6.3.000" expanded="true" height="94" name="Multiply (2)" width="90" x="380" y="435"/>
      <operator activated="true" class="join" compatibility="6.3.000" expanded="true" height="76" name="Join" width="90" x="514" y="300">
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="FIRST_ID" value="id"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="DROP 1st ID" width="90" x="648" y="300">
        <description>In this example I only care about IDs. </description>
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="FIRST_ID"/>
        <parameter key="invert_selection" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="6.3.000" expanded="true" height="76" name="RENAME 1st ID" width="90" x="782" y="390">
        <parameter key="old_name" value="id_old"/>
        <parameter key="new_name" value="FIRST_ID"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="Set Role (2)" width="90" x="715" y="525">
        <parameter key="attribute_name" value="FIRST_ID"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="join" compatibility="6.3.000" expanded="true" height="76" name="Join (2)" width="90" x="648" y="660">
        <parameter key="use_id_attribute_as_key" value="false"/>
        <list key="key_attributes">
          <parameter key="SECOND_ID" value="id"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="DROP 2nd ID" width="90" x="782" y="705">
        <description>In this example I only care about IDs. </description>
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="SECOND_ID"/>
        <parameter key="invert_selection" value="true"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="6.3.000" expanded="true" height="76" name="RENAME 2nd ID" width="90" x="916" y="705">
        <parameter key="old_name" value="id_old"/>
        <parameter key="new_name" value="SECOND_ID"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="OriginalIDs are back! Yay!" width="90" x="916" y="525">
        <parameter key="attribute_name" value="SECOND_ID"/>
        <list key="set_additional_roles"/>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Data to Similarity" to_port="example set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
      <connect from_op="Generate ID" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
      <connect from_op="Data to Similarity" from_port="similarity" to_op="Similarity to Data" to_port="similarity"/>
      <connect from_op="Data to Similarity" from_port="example set" to_op="Similarity to Data" to_port="exampleSet"/>
      <connect from_op="Similarity to Data" from_port="exampleSet" to_op="Join" to_port="left"/>
      <connect from_op="Multiply (2)" from_port="output 1" to_op="Join" to_port="right"/>
      <connect from_op="Multiply (2)" from_port="output 2" to_op="Join (2)" to_port="right"/>
      <connect from_op="Join" from_port="join" to_op="DROP 1st ID" to_port="example set input"/>
      <connect from_op="DROP 1st ID" from_port="example set output" to_op="RENAME 1st ID" to_port="example set input"/>
      <connect from_op="RENAME 1st ID" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
      <connect from_op="Set Role (2)" from_port="example set output" to_op="Join (2)" to_port="left"/>
      <connect from_op="Join (2)" from_port="join" to_op="DROP 2nd ID" to_port="example set input"/>
      <connect from_op="DROP 2nd ID" from_port="example set output" to_op="RENAME 2nd ID" to_port="example set input"/>
      <connect from_op="RENAME 2nd ID" from_port="example set output" to_op="OriginalIDs are back! Yay!" to_port="example set input"/>
      <connect from_op="OriginalIDs are back! Yay!" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Sign In or Register to comment.