Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.
Answer to LinkedIn question - Data to Similarity
JEdward
RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
Hi Raj,
In answer to your question:
"I am using RapidMinor for Document Similarity .after getting output similarity ,I found that it has replaced ID given by me by its own ids . How to avoid this."
https://www.linkedin.com/groups/How-avoid-replace-Id-rapidminor-4025645.S.5981787877623816192
I am assuming that you are using the data to similarity operator and want to get your own IDs back.
Here's an example using the Iris dataset that you can look at. One way to do it is to give your original ID a new role (a pseudo-role) and name, then (after the similarity is an example set), join the datasets together twice (once on FIRST_ID and then again on SECOND_ID) and keep only your old ID after you have made each join.
Paste the XML code below into the XML view in RapidMiner and then follow it through to get a better idea of how the pseudo-role works.
In answer to your question:
"I am using RapidMinor for Document Similarity .after getting output similarity ,I found that it has replaced ID given by me by its own ids . How to avoid this."
https://www.linkedin.com/groups/How-avoid-replace-Id-rapidminor-4025645.S.5981787877623816192
I am assuming that you are using the data to similarity operator and want to get your own IDs back.
Here's an example using the Iris dataset that you can look at. One way to do it is to give your original ID a new role (a pseudo-role) and name, then (after the similarity is an example set), join the datasets together twice (once on FIRST_ID and then again on SECOND_ID) and keep only your old ID after you have made each join.
Paste the XML code below into the XML view in RapidMiner and then follow it through to get a better idea of how the pseudo-role works.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.3.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="6.3.000" expanded="true" height="60" name="Retrieve Iris" width="90" x="45" y="435">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="Set Role" width="90" x="45" y="255">
<description>Setting the role of the original ID to id_old to make it 'disappear' to calculations. </description>
<parameter key="attribute_name" value="id"/>
<parameter key="target_role" value="id_old"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="rename" compatibility="6.3.000" expanded="true" height="76" name="Rename" width="90" x="45" y="120">
<description>renaming the original ID because the later generates attributes operator also uses the same name. </description>
<parameter key="old_name" value="id"/>
<parameter key="new_name" value="id_old"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="multiply" compatibility="6.3.000" expanded="true" height="94" name="Multiply" width="90" x="179" y="75"/>
<operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="Select Attributes" width="90" x="246" y="255">
<description>In this example I only care about IDs. You can add the other fields in if you like. </description>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="id_old"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="generate_id" compatibility="6.3.000" expanded="true" height="76" name="Generate ID" width="90" x="246" y="480"/>
<operator activated="true" class="data_to_similarity" compatibility="6.3.000" expanded="true" height="76" name="Data to Similarity" width="90" x="380" y="30">
<parameter key="numerical_measure" value="KernelEuclideanDistance"/>
<parameter key="divergence" value="LogisticLoss"/>
</operator>
<operator activated="true" class="similarity_to_data" compatibility="6.3.000" expanded="true" height="76" name="Similarity to Data" width="90" x="514" y="30"/>
<operator activated="true" class="multiply" compatibility="6.3.000" expanded="true" height="94" name="Multiply (2)" width="90" x="380" y="435"/>
<operator activated="true" class="join" compatibility="6.3.000" expanded="true" height="76" name="Join" width="90" x="514" y="300">
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="FIRST_ID" value="id"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="DROP 1st ID" width="90" x="648" y="300">
<description>In this example I only care about IDs. </description>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="FIRST_ID"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="rename" compatibility="6.3.000" expanded="true" height="76" name="RENAME 1st ID" width="90" x="782" y="390">
<parameter key="old_name" value="id_old"/>
<parameter key="new_name" value="FIRST_ID"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="Set Role (2)" width="90" x="715" y="525">
<parameter key="attribute_name" value="FIRST_ID"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="join" compatibility="6.3.000" expanded="true" height="76" name="Join (2)" width="90" x="648" y="660">
<parameter key="use_id_attribute_as_key" value="false"/>
<list key="key_attributes">
<parameter key="SECOND_ID" value="id"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="DROP 2nd ID" width="90" x="782" y="705">
<description>In this example I only care about IDs. </description>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="SECOND_ID"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="rename" compatibility="6.3.000" expanded="true" height="76" name="RENAME 2nd ID" width="90" x="916" y="705">
<parameter key="old_name" value="id_old"/>
<parameter key="new_name" value="SECOND_ID"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="OriginalIDs are back! Yay!" width="90" x="916" y="525">
<parameter key="attribute_name" value="SECOND_ID"/>
<list key="set_additional_roles"/>
</operator>
<connect from_op="Retrieve Iris" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Data to Similarity" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Data to Similarity" from_port="similarity" to_op="Similarity to Data" to_port="similarity"/>
<connect from_op="Data to Similarity" from_port="example set" to_op="Similarity to Data" to_port="exampleSet"/>
<connect from_op="Similarity to Data" from_port="exampleSet" to_op="Join" to_port="left"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Join" to_port="right"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Join (2)" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="DROP 1st ID" to_port="example set input"/>
<connect from_op="DROP 1st ID" from_port="example set output" to_op="RENAME 1st ID" to_port="example set input"/>
<connect from_op="RENAME 1st ID" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Join (2)" to_port="left"/>
<connect from_op="Join (2)" from_port="join" to_op="DROP 2nd ID" to_port="example set input"/>
<connect from_op="DROP 2nd ID" from_port="example set output" to_op="RENAME 2nd ID" to_port="example set input"/>
<connect from_op="RENAME 2nd ID" from_port="example set output" to_op="OriginalIDs are back! Yay!" to_port="example set input"/>
<connect from_op="OriginalIDs are back! Yay!" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0