HR sourcing

asn4293 · January 2018

I have two excel files containing profiles of the employees. First is employees data and second file contain position we want to fill.

Excel file one

Id name skills department language exp

Excel file two positions
skills department language exp

I want to match positions with employee profile

Like this
Id name skills department language
1 a x fin eng
exp
3 years similarity index .30

lionelderkrikor · January 2018

Hi @asn4293,

Executed with the fictive exampleset, I supplied, and the 4 attributes, you have done, I have similarities between 0 and 1

but how said in my previous post, there are errors in the results, and for the moment I don't know why.

Regards,

Lionel

lionelderkrikor · January 2018

Hi @asn4293,

If I good understood, your second Excel file is empty and you want to fill it with the information(s) of the first one :

the second Excel file is equal to the first Excel file but there are not the Id ,and the name ?

Regards,

Lionel

asn4293 · January 2018

Second file contain the job position I want to match with the first file which contain employees, end result would be similarity percentage with employee data ( .30 )

lionelderkrikor · January 2018

Hi again @asn4293,

Can you share some of your data of your 2 Excel files, please ?

NB : Anonymize the data if it's sensitive subject.

Regards,

Lionel

lionelderkrikor · January 2018

Hi again @asn4293,

A first element of response :

I used the Cross Distance operator with CosineSimilarity as numerical measure.

Here a process with the Id of the employee, the Id of the position (I created an Id for the position) and the similarity between the employee and the position :

<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Employees" width="90" x="45" y="187">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\HR_Sourcing\Employees.xlsx"/>
        <parameter key="imported_cell_range" value="A1:F4"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id.true.integer.attribute"/>
          <parameter key="1" value="name.true.polynominal.attribute"/>
          <parameter key="2" value="skills.true.polynominal.attribute"/>
          <parameter key="3" value="department.true.polynominal.attribute"/>
          <parameter key="4" value="language.true.polynominal.attribute"/>
          <parameter key="5" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role" width="90" x="179" y="187">
        <parameter key="attribute_name" value="Id"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="department|experience|language|skills"/>
      </operator>
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Position" width="90" x="45" y="34">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\HR_Sourcing\Employees.xlsx"/>
        <parameter key="sheet_number" value="2"/>
        <parameter key="imported_cell_range" value="A1:D2"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="skills.true.polynominal.attribute"/>
          <parameter key="1" value="department.true.polynominal.attribute"/>
          <parameter key="2" value="language.true.polynominal.attribute"/>
          <parameter key="3" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="generate_id" compatibility="8.0.001" expanded="true" height="82" name="Generate ID" width="90" x="179" y="34"/>
      <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="313" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="department|experience|language|skills"/>
      </operator>
      <operator activated="true" class="cross_distances" compatibility="8.0.001" expanded="true" height="103" name="Cross Distances" width="90" x="648" y="85">
        <parameter key="measure_types" value="NumericalMeasures"/>
        <parameter key="numerical_measure" value="CosineSimilarity"/>
        <parameter key="compute_similarities" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="849" y="85">
        <parameter key="old_name" value="document"/>
        <parameter key="new_name" value="Employee"/>
        <list key="rename_additional_attributes">
          <parameter key="request" value="Position"/>
        </list>
      </operator>
      <connect from_op="Employees" from_port="output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Cross Distances" to_port="reference set"/>
      <connect from_op="Position" from_port="output" to_op="Generate ID" to_port="example set input"/>
      <connect from_op="Generate ID" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Cross Distances" to_port="request set"/>
      <connect from_op="Cross Distances" from_port="result set" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="90"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

and here you can find my fictive example set :

https://drive.google.com/open?id=1k0HvIMv-cti_UaHu_jZjcXQzygfiG56V

I hope it helps,

Regards,

Lionel

lionelderkrikor · January 2018

Hi again @asn4293,

Don't consider the process, I share it too fast, without doing all the checks.

Although I think this is the right method, this process is giving false results :

- The first example of the "Employees" example set has always a similarity of 1

- The similarities associated to the "Employees" don't change when we change the caracteristics of the "Position"

If someone has an idea....

Here the process :

<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Employees" width="90" x="45" y="85">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\HR_Sourcing\Employees.xlsx"/>
        <parameter key="imported_cell_range" value="A1:F5"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id.true.integer.id"/>
          <parameter key="1" value="name.true.nominal.attribute"/>
          <parameter key="2" value="skills.true.nominal.attribute"/>
          <parameter key="3" value="department.true.nominal.attribute"/>
          <parameter key="4" value="language.true.nominal.attribute"/>
          <parameter key="5" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="179" y="85"/>
      <operator activated="true" class="read_excel" compatibility="8.0.001" expanded="true" height="68" name="Position" width="90" x="45" y="238">
        <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\HR_Sourcing\Employees.xlsx"/>
        <parameter key="sheet_number" value="2"/>
        <parameter key="imported_cell_range" value="A1:E2"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Id.true.integer.id"/>
          <parameter key="1" value="skills.true.nominal.attribute"/>
          <parameter key="2" value="department.true.nominal.attribute"/>
          <parameter key="3" value="language.true.nominal.attribute"/>
          <parameter key="4" value="experience.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="238">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="department|experience|language|skills"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="department|experience|language|skills"/>
      </operator>
      <operator activated="true" class="cross_distances" compatibility="8.0.001" expanded="true" height="103" name="Cross Distances" width="90" x="447" y="85">
        <parameter key="measure_types" value="NumericalMeasures"/>
        <parameter key="numerical_measure" value="CosineSimilarity"/>
        <parameter key="compute_similarities" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="8.0.001" expanded="true" height="82" name="Rename" width="90" x="581" y="85">
        <parameter key="old_name" value="document"/>
        <parameter key="new_name" value="Employee"/>
        <list key="rename_additional_attributes">
          <parameter key="request" value="Position"/>
          <parameter key="distance" value="similarity"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role (3)" width="90" x="715" y="85">
        <parameter key="attribute_name" value="Employee"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="8.0.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="313" y="136">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Id|name"/>
      </operator>
      <operator activated="true" class="join" compatibility="8.0.001" expanded="true" height="82" name="Join" width="90" x="849" y="136">
        <list key="key_attributes"/>
      </operator>
      <connect from_op="Employees" from_port="output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Position" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Cross Distances" to_port="request set"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Cross Distances" to_port="reference set"/>
      <connect from_op="Cross Distances" from_port="result set" to_op="Rename" to_port="example set input"/>
      <connect from_op="Cross Distances" from_port="request set" to_port="result 1"/>
      <connect from_op="Cross Distances" from_port="reference set" to_port="result 2"/>
      <connect from_op="Rename" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_op="Join" to_port="right"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Join" from_port="join" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

Thanks you,

Regards,

Lionel

asn4293 · January 2018

I ran it few times on my data with only taking one or two attributes, it either give 1 or 0 in similarity

asn4293 · March 2018

Hi @lionelderkrikor

I was Able to find out the solution as mentioned by you by finetuning the model using nominal measures and using simple matching similarity in Cross Differences Operator.
Which gives me follwing results:

1	1.0	John	HR Head	0.0
2	1.0	John	Finance Head	0.5
3	2.0	Mickeal	Finance Head	0.25
4	2.0	Mickeal	HR Head	0.75
5	3.0	Sean	Finance Head	0.5
6	3.0	Sean	HR Head	1.0

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

HR sourcing

Best Answer

Answers