RapidMiner

0 Likes

Re: Cross Distances operator : Weird results

Status: Declined

Hi all,

 

I performed the calculation of "distances" manually using RapidMiner (not with ......Excel this time @sgenzer...)

Stop me if i'm wrong, but I considered that : 

 - for nominal attributes : Distance_Attribute_x = IF(Attribute_x[Employee caracteristics] == Attribute_x[Position],0,1).

 - for numerical attribute : Distance_Attribute_x = (Attribute_x[Employee caracteristics] - Attribute_x[Position])^2.

then Distance = SQRT(sum(Distance_Attribute_x)).

 

I obtained the following results : 

 

HR_Sourcing_7.png

for recall, I obtained the following results with the Cross Distances operator: 

 

HR_Sourcing_8.png

We see that there is only one "good result".

 

Here the entire process : 

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Employees (2)" width="90" x="45" y="34">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\HR_Sourcing\Employees.xlsx"/>
        <parameter key="imported_cell_range" value="A1:F5"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id_employee.true.integer.attribute"/>
          <parameter key="1" value="name.true.polynominal.attribute"/>
          <parameter key="2" value="skills.true.polynominal.attribute"/>
          <parameter key="3" value="department.true.polynominal.attribute"/>
          <parameter key="4" value="language.true.polynominal.attribute"/>
          <parameter key="5" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes (4)" width="90" x="179" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="name"/>
        <parameter key="attributes" value="Id_employee|department|experience|language|skills"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply (2)" width="90" x="313" y="34"/>
      <operator activated="true" class="filter_example_range" compatibility="8.1.000" expanded="true" height="82" name="Filter Example Range" width="90" x="447" y="85">
        <parameter key="first_example" value="3"/>
        <parameter key="last_example" value="3"/>
      </operator>
      <operator activated="true" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances (2)" width="90" x="648" y="34">
        <parameter key="numerical_measure" value="CosineSimilarity"/>
        <description align="center" color="yellow" colored="true" width="126">Scott's process</description>
      </operator>
      <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Employees" width="90" x="45" y="391">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\HR_Sourcing\Employees.xlsx"/>
        <parameter key="imported_cell_range" value="A1:F5"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id_employee.true.integer.id"/>
          <parameter key="1" value="name.true.nominal.attribute"/>
          <parameter key="2" value="skills.true.nominal.attribute"/>
          <parameter key="3" value="department.true.nominal.attribute"/>
          <parameter key="4" value="language.true.nominal.attribute"/>
          <parameter key="5" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply (3)" width="90" x="179" y="340"/>
      <operator activated="true" class="trim" compatibility="8.1.000" expanded="true" height="82" name="Trim (2)" width="90" x="313" y="340"/>
      <operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply" width="90" x="447" y="289"/>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="581" y="442">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Id_employee|department|experience|language|skills"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes (3)" width="90" x="581" y="289">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="name|Id_employee"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Position" width="90" x="45" y="493">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\HR_Sourcing\Employees.xlsx"/>
        <parameter key="sheet_number" value="2"/>
        <parameter key="imported_cell_range" value="A1:E2"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id_position.true.integer.id"/>
          <parameter key="1" value="skills.true.nominal.attribute"/>
          <parameter key="2" value="department.true.nominal.attribute"/>
          <parameter key="3" value="language.true.nominal.attribute"/>
          <parameter key="4" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply (4)" width="90" x="179" y="493"/>
      <operator activated="true" class="concurrency:join" compatibility="8.1.000" expanded="true" height="82" name="Join (2)" width="90" x="313" y="442">
        <parameter key="remove_double_attributes" value="false"/>
        <parameter key="join_type" value="outer"/>
        <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.1.000" expanded="true" height="82" name="Missing values" width="90" x="447" y="442">
        <process expanded="true">
          <operator activated="true" class="series:replace_missing_series_values" compatibility="7.4.000" expanded="true" height="82" name="Replace Missing Values (Series)" width="90" x="45" y="34">
            <parameter key="attribute_name" value="skills_from_ES2"/>
          </operator>
          <operator activated="true" class="series:replace_missing_series_values" compatibility="7.4.000" expanded="true" height="82" name="Replace Missing Values (2)" width="90" x="179" y="34">
            <parameter key="attribute_name" value="department_from_ES2"/>
          </operator>
          <operator activated="true" class="series:replace_missing_series_values" compatibility="7.4.000" expanded="true" height="82" name="Replace Missing Values (3)" width="90" x="313" y="34">
            <parameter key="attribute_name" value="language_from_ES2"/>
          </operator>
          <operator activated="true" class="series:replace_missing_series_values" compatibility="7.4.000" expanded="true" height="82" name="Replace Missing Values (4)" width="90" x="447" y="34">
            <parameter key="attribute_name" value="experience_from_ES2"/>
          </operator>
          <connect from_port="in 1" to_op="Replace Missing Values (Series)" to_port="example set input"/>
          <connect from_op="Replace Missing Values (Series)" from_port="example set output" to_op="Replace Missing Values (2)" to_port="example set input"/>
          <connect from_op="Replace Missing Values (2)" from_port="example set output" to_op="Replace Missing Values (3)" to_port="example set input"/>
          <connect from_op="Replace Missing Values (3)" from_port="example set output" to_op="Replace Missing Values (4)" to_port="example set input"/>
          <connect from_op="Replace Missing Values (4)" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="8.1.000" expanded="true" height="82" name="Distance calculation" width="90" x="581" y="544">
        <process expanded="true">
          <operator activated="true" class="generate_attributes" compatibility="8.1.000" expanded="true" height="82" name="Generate Attributes" width="90" x="45" y="34">
            <list key="function_descriptions">
              <parameter key="Dist. skills" value="if(skills==skills_from_ES2,0,1)"/>
              <parameter key="Dist. department" value="if(department==department_from_ES2,0,1)"/>
              <parameter key="Dist. language" value="if(language==language_from_ES2,0,1)"/>
              <parameter key="Dist. experience" value="pow((experience-experience_from_ES2),2)"/>
            </list>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="8.1.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="179" y="34">
            <list key="function_descriptions">
              <parameter key="Distance" value="sqrt([Dist. skills]+[Dist. department]+[Dist. language]+[Dist. experience])"/>
            </list>
          </operator>
          <connect from_port="in 1" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="715" y="544">
        <parameter key="attribute_name" value="Distance"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="trim" compatibility="8.1.000" expanded="true" height="82" name="Trim" width="90" x="313" y="544"/>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="447" y="595">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="department|experience|language|skills|Id_position"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances" width="90" x="715" y="391">
        <parameter key="numerical_measure" value="CosineSimilarity"/>
      </operator>
      <operator activated="true" class="rename" compatibility="8.1.000" expanded="true" height="82" name="Rename" width="90" x="849" y="442">
        <parameter key="old_name" value="document"/>
        <parameter key="new_name" value="Employee"/>
        <list key="rename_additional_attributes">
          <parameter key="request" value="position"/>
          <parameter key="distance" value="similarity"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role (3)" width="90" x="983" y="442">
        <parameter key="attribute_name" value="Employee"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="join" compatibility="8.1.000" expanded="true" height="82" name="Join" width="90" x="1184" y="289">
        <list key="key_attributes"/>
      </operator>
      <connect from_op="Employees (2)" from_port="output" to_op="Select Attributes (4)" to_port="example set input"/>
      <connect from_op="Select Attributes (4)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
      <connect from_op="Multiply (2)" from_port="output 1" to_op="Cross Distances (2)" to_port="request set"/>
      <connect from_op="Multiply (2)" from_port="output 2" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_op="Cross Distances (2)" to_port="reference set"/>
      <connect from_op="Cross Distances (2)" from_port="result set" to_port="result 1"/>
      <connect from_op="Employees" from_port="output" to_op="Multiply (3)" to_port="input"/>
      <connect from_op="Multiply (3)" from_port="output 1" to_op="Trim (2)" to_port="example set input"/>
      <connect from_op="Multiply (3)" from_port="output 2" to_op="Join (2)" to_port="left"/>
      <connect from_op="Trim (2)" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Cross Distances" to_port="reference set"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Position" from_port="output" to_op="Multiply (4)" to_port="input"/>
      <connect from_op="Multiply (4)" from_port="output 1" to_op="Trim" to_port="example set input"/>
      <connect from_op="Multiply (4)" from_port="output 2" to_op="Join (2)" to_port="right"/>
      <connect from_op="Join (2)" from_port="join" to_op="Missing values" to_port="in 1"/>
      <connect from_op="Missing values" from_port="out 1" to_op="Distance calculation" to_port="in 1"/>
      <connect from_op="Distance calculation" from_port="out 1" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_port="result 3"/>
      <connect from_op="Trim" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Cross Distances" to_port="request set"/>
      <connect from_op="Cross Distances" from_port="result set" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_op="Join" to_port="right"/>
      <connect from_op="Join" from_port="join" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

and here the Excel file : 

https://drive.google.com/open?id=1dmj8zlHhtrdzMqVHYYmZO2VuevHCMcFh

 

I hope that I help to advance the reflection on the Cross Distances operator.

 

Best regards, 

 

Lionel

 

 

 

 

 

 

2 Comments (2 New)
Comments
Community Manager

hi @lionelderkrikor so I played with your csv files and I believe it is a metadata problem (bug).  Here's how I come to this conclusion: if you do this process, you get the expected result:

 

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="false" breakpoints="after" class="subprocess" compatibility="8.1.000" expanded="true" height="103" name="Subprocess" width="90" x="45" y="850">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="179" y="30">
            <list key="attribute_values">
              <parameter key="attribute1" value="1"/>
              <parameter key="attribute2" value="2"/>
              <parameter key="attribute3" value="3"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="179" y="165">
            <list key="attribute_values">
              <parameter key="attribute1" value="1"/>
              <parameter key="attribute2" value="2"/>
              <parameter key="attribute3" value="3"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="179" y="300">
            <list key="attribute_values">
              <parameter key="attribute1" value="4"/>
              <parameter key="attribute2" value="5"/>
              <parameter key="attribute3" value="6"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="8.1.000" expanded="true" height="103" name="Append" width="90" x="313" y="210"/>
          <operator activated="true" class="generate_id" compatibility="8.1.000" expanded="true" height="82" name="Generate ID" width="90" x="514" y="30">
            <parameter key="create_nominal_ids" value="true"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="8.1.000" expanded="true" height="82" name="Generate ID (2)" width="90" x="514" y="210">
            <parameter key="create_nominal_ids" value="true"/>
          </operator>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Append" from_port="merged set" to_op="Generate ID (2)" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_port="out 1"/>
          <connect from_op="Generate ID (2)" from_port="example set output" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="162"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances" width="90" x="246" y="850">
        <parameter key="numerical_measure" value="KernelEuclideanDistance"/>
      </operator>
      <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Employees (2)" width="90" x="45" y="187">
        <parameter key="csv_file" value="/Users/genzerconsulting/Desktop/Employees_csv.csv"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="encoding" value="UTF-8"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id_employee.true.integer.id"/>
          <parameter key="1" value="name.true.polynominal.attribute"/>
          <parameter key="2" value="skills.true.polynominal.attribute"/>
          <parameter key="3" value="department.true.polynominal.attribute"/>
          <parameter key="4" value="language.true.polynominal.attribute"/>
          <parameter key="5" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="name"/>
        <parameter key="attributes" value="department|experience|language|skills|ID"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="false" class="remove_unused_values" compatibility="8.1.000" expanded="true" height="103" name="Remove Unused Values" width="90" x="514" y="34"/>
      <operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply" width="90" x="313" y="187"/>
      <operator activated="true" class="filter_example_range" compatibility="8.1.000" expanded="true" height="82" name="Filter Example Range" width="90" x="514" y="289">
        <parameter key="first_example" value="3"/>
        <parameter key="last_example" value="3"/>
      </operator>
      <operator activated="true" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances (2)" width="90" x="715" y="238">
        <parameter key="numerical_measure" value="CosineSimilarity"/>
      </operator>
      <connect from_op="Subprocess" from_port="out 1" to_op="Cross Distances" to_port="request set"/>
      <connect from_op="Subprocess" from_port="out 2" to_op="Cross Distances" to_port="reference set"/>
      <connect from_op="Employees (2)" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Cross Distances (2)" to_port="request set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_op="Cross Distances (2)" to_port="reference set"/>
      <connect from_op="Cross Distances (2)" from_port="result set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

However if you remove unused values (which only affects the metadata), you get the same problem as before:

 

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="false" breakpoints="after" class="subprocess" compatibility="8.1.000" expanded="true" height="103" name="Subprocess" width="90" x="45" y="850">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="179" y="30">
            <list key="attribute_values">
              <parameter key="attribute1" value="1"/>
              <parameter key="attribute2" value="2"/>
              <parameter key="attribute3" value="3"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="179" y="165">
            <list key="attribute_values">
              <parameter key="attribute1" value="1"/>
              <parameter key="attribute2" value="2"/>
              <parameter key="attribute3" value="3"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="179" y="300">
            <list key="attribute_values">
              <parameter key="attribute1" value="4"/>
              <parameter key="attribute2" value="5"/>
              <parameter key="attribute3" value="6"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="8.1.000" expanded="true" height="103" name="Append" width="90" x="313" y="210"/>
          <operator activated="true" class="generate_id" compatibility="8.1.000" expanded="true" height="82" name="Generate ID" width="90" x="514" y="30">
            <parameter key="create_nominal_ids" value="true"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="8.1.000" expanded="true" height="82" name="Generate ID (2)" width="90" x="514" y="210">
            <parameter key="create_nominal_ids" value="true"/>
          </operator>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Append" from_port="merged set" to_op="Generate ID (2)" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_port="out 1"/>
          <connect from_op="Generate ID (2)" from_port="example set output" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="162"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances" width="90" x="246" y="850">
        <parameter key="numerical_measure" value="KernelEuclideanDistance"/>
      </operator>
      <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Employees (2)" width="90" x="45" y="187">
        <parameter key="csv_file" value="/Users/genzerconsulting/Desktop/Employees_csv.csv"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="encoding" value="UTF-8"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id_employee.true.integer.id"/>
          <parameter key="1" value="name.true.polynominal.attribute"/>
          <parameter key="2" value="skills.true.polynominal.attribute"/>
          <parameter key="3" value="department.true.polynominal.attribute"/>
          <parameter key="4" value="language.true.polynominal.attribute"/>
          <parameter key="5" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="name"/>
        <parameter key="attributes" value="department|experience|language|skills|ID"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply" width="90" x="313" y="187"/>
      <operator activated="true" class="filter_example_range" compatibility="8.1.000" expanded="true" height="82" name="Filter Example Range" width="90" x="447" y="289">
        <parameter key="first_example" value="3"/>
        <parameter key="last_example" value="3"/>
      </operator>
      <operator activated="true" class="remove_unused_values" compatibility="8.1.000" expanded="true" height="103" name="Remove Unused Values" width="90" x="581" y="289"/>
      <operator activated="true" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances (2)" width="90" x="782" y="136">
        <parameter key="numerical_measure" value="CosineSimilarity"/>
      </operator>
      <connect from_op="Subprocess" from_port="out 1" to_op="Cross Distances" to_port="request set"/>
      <connect from_op="Subprocess" from_port="out 2" to_op="Cross Distances" to_port="reference set"/>
      <connect from_op="Employees (2)" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Cross Distances (2)" to_port="request set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_op="Remove Unused Values" to_port="example set input"/>
      <connect from_op="Remove Unused Values" from_port="example set output" to_op="Cross Distances (2)" to_port="reference set"/>
      <connect from_op="Cross Distances (2)" from_port="result set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

And if you move the "Remove Unused Values" to the top so the metadata matches again, it works again:

 

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="false" breakpoints="after" class="subprocess" compatibility="8.1.000" expanded="true" height="103" name="Subprocess" width="90" x="45" y="850">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="179" y="30">
            <list key="attribute_values">
              <parameter key="attribute1" value="1"/>
              <parameter key="attribute2" value="2"/>
              <parameter key="attribute3" value="3"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="179" y="165">
            <list key="attribute_values">
              <parameter key="attribute1" value="1"/>
              <parameter key="attribute2" value="2"/>
              <parameter key="attribute3" value="3"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="179" y="300">
            <list key="attribute_values">
              <parameter key="attribute1" value="4"/>
              <parameter key="attribute2" value="5"/>
              <parameter key="attribute3" value="6"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="8.1.000" expanded="true" height="103" name="Append" width="90" x="313" y="210"/>
          <operator activated="true" class="generate_id" compatibility="8.1.000" expanded="true" height="82" name="Generate ID" width="90" x="514" y="30">
            <parameter key="create_nominal_ids" value="true"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="8.1.000" expanded="true" height="82" name="Generate ID (2)" width="90" x="514" y="210">
            <parameter key="create_nominal_ids" value="true"/>
          </operator>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Append" from_port="merged set" to_op="Generate ID (2)" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_port="out 1"/>
          <connect from_op="Generate ID (2)" from_port="example set output" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="162"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances" width="90" x="246" y="850">
        <parameter key="numerical_measure" value="KernelEuclideanDistance"/>
      </operator>
      <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Employees (2)" width="90" x="45" y="187">
        <parameter key="csv_file" value="/Users/genzerconsulting/Desktop/Employees_csv.csv"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="encoding" value="UTF-8"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id_employee.true.integer.id"/>
          <parameter key="1" value="name.true.polynominal.attribute"/>
          <parameter key="2" value="skills.true.polynominal.attribute"/>
          <parameter key="3" value="department.true.polynominal.attribute"/>
          <parameter key="4" value="language.true.polynominal.attribute"/>
          <parameter key="5" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="name"/>
        <parameter key="attributes" value="department|experience|language|skills|ID"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="8.1.000" expanded="true" height="103" name="Multiply" width="90" x="313" y="187"/>
      <operator activated="true" class="filter_example_range" compatibility="8.1.000" expanded="true" height="82" name="Filter Example Range" width="90" x="447" y="289">
        <parameter key="first_example" value="3"/>
        <parameter key="last_example" value="3"/>
      </operator>
      <operator activated="true" class="remove_unused_values" compatibility="8.1.000" expanded="true" height="103" name="Remove Unused Values" width="90" x="447" y="85"/>
      <operator activated="true" class="cross_distances" compatibility="8.1.000" expanded="true" height="103" name="Cross Distances (2)" width="90" x="782" y="136">
        <parameter key="numerical_measure" value="CosineSimilarity"/>
      </operator>
      <connect from_op="Subprocess" from_port="out 1" to_op="Cross Distances" to_port="request set"/>
      <connect from_op="Subprocess" from_port="out 2" to_op="Cross Distances" to_port="reference set"/>
      <connect from_op="Employees (2)" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Remove Unused Values" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_op="Cross Distances (2)" to_port="reference set"/>
      <connect from_op="Remove Unused Values" from_port="example set output" to_op="Cross Distances (2)" to_port="request set"/>
      <connect from_op="Cross Distances (2)" from_port="result set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

It's a weird one - never seen it before. I'll move it to Product Feedback.

 

Scott

 

Community Manager
Status: Declined

merging with other report on Cross Distance operator