Concatenation duplication

asn4293asn4293 Member Posts: 24 Contributor I
edited December 2018 in Help

I want to remove duplication of the data after using Concat operator 

Original Data 

Id name skills department language experience
1 John x Fin FR 3
2 Mickeal y HR ENG 5
3 Sean y HR ENG 1
1 John x Fin ENG 3


Should Look Like

Id name skills department language experience
1 John x Fin FR|ENG 3
2 Mickeal y HR ENG 5
3 Sean y HR ENG 1


 But After using Concat it is looking like 

Id name skills department language experience
1 John|John x|x Fin|Fin FR|ENG 3
2 Mickeal y HR ENG 5
3 Sean y HR ENG 1

 

<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.0.001" expanded="true" height="68" name="Retrieve" width="90" x="112" y="85">
<parameter key="repository_entry" value="//Local Repository/data/Employees"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.0.001" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
<parameter key="attribute_name" value="Id"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="85">
<list key="aggregation_attributes">
<parameter key="department" value="concatenation"/>
<parameter key="language" value="concatenation"/>
<parameter key="name" value="concatenation"/>
<parameter key="skills" value="concatenation"/>
</list>
<parameter key="group_by_attributes" value="Id"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

This is file you can work on 
https://drive.google.com/open?id=1evx7JGzVmx0f6VO1fNMFG5tBGmB-xKVN

Best Answer

  • Edin_KlapicEdin_Klapic Moderator, Employee, RMResearcher, Member Posts: 299 RM Data Scientist
    Solution Accepted

    Hi @asn4293,

     

    For my tests, I created an Excel file only with the last table you posted which is the equivalent of your dataset but without the ID.

    Below you find the XML for a working process with your dataset.

     

    By using the Operator Work on Subset and only selecting the Mobility Region for that you can overcome the issue with the User Sys ID.

     

    Happy Mining,

    Edin

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
    <parameter key="excel_file" value="C:\Users\EdinKlapic\Desktop\MobilityConcat.xlsx"/>
    <parameter key="imported_cell_range" value="A1:B14065"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="User Sys ID.true.integer.id"/>
    <parameter key="1" value="Mobility Region.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="loop_examples" compatibility="8.1.003" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="34">
    <process expanded="true">
    <operator activated="false" breakpoints="after" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="581" y="595"/>
    <operator activated="false" class="numerical_to_polynominal" compatibility="8.1.003" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="45" y="187">
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="false" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range" width="90" x="380" y="136">
    <parameter key="first_example" value="%{example}"/>
    <parameter key="last_example" value="%{example}"/>
    </operator>
    <operator activated="false" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split" width="90" x="514" y="136">
    <parameter key="split_pattern" value="\|"/>
    </operator>
    <operator activated="false" class="de_pivot" compatibility="8.1.003" expanded="true" height="82" name="De-Pivot" width="90" x="648" y="136">
    <list key="attribute_name">
    <parameter key="Region" value=".*"/>
    </list>
    <parameter key="index_attribute" value="ID"/>
    <parameter key="create_nominal_index" value="true"/>
    </operator>
    <operator activated="false" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates (2)" width="90" x="313" y="289">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Region"/>
    </operator>
    <operator activated="false" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="289">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Region"/>
    </operator>
    <operator activated="false" breakpoints="after" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose" width="90" x="581" y="289"/>
    <operator activated="false" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="715" y="289">
    <list key="aggregation_attributes">
    <parameter key="att_1" value="concatenation"/>
    </list>
    <parameter key="only_distinct" value="true"/>
    </operator>
    <operator activated="true" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range (3)" width="90" x="45" y="34">
    <parameter key="first_example" value="%{example}"/>
    <parameter key="last_example" value="%{example}"/>
    </operator>
    <operator activated="true" class="work_on_subset" compatibility="8.1.003" expanded="true" height="82" name="Work on Subset" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Mobility Region"/>
    <parameter key="deliver_inner_results" value="true"/>
    <process expanded="true">
    <operator activated="true" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split (2)" width="90" x="45" y="34">
    <parameter key="split_pattern" value="\|"/>
    </operator>
    <operator activated="true" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose (2)" width="90" x="179" y="34"/>
    <operator activated="true" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates (3)" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="att_1"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="id"/>
    <parameter key="invert_selection" value="true"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate (2)" width="90" x="581" y="34">
    <list key="aggregation_attributes">
    <parameter key="att_1" value="concatenation"/>
    </list>
    <parameter key="only_distinct" value="true"/>
    </operator>
    <operator activated="true" class="rename" compatibility="8.1.003" expanded="true" height="82" name="Rename" width="90" x="715" y="34">
    <parameter key="old_name" value="concat(att_1)"/>
    <parameter key="new_name" value="Mobility Region"/>
    <list key="rename_additional_attributes"/>
    </operator>
    <connect from_port="exampleSet" to_op="Split (2)" to_port="example set input"/>
    <connect from_op="Split (2)" from_port="example set output" to_op="Transpose (2)" to_port="example set input"/>
    <connect from_op="Transpose (2)" from_port="example set output" to_op="Remove Duplicates (3)" to_port="example set input"/>
    <connect from_op="Remove Duplicates (3)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
    <connect from_op="Aggregate (2)" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <connect from_op="Rename" from_port="example set output" to_port="example set"/>
    <portSpacing port="source_exampleSet" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    </process>
    </operator>
    <connect from_port="example set" to_op="Filter Example Range (3)" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Split" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_op="De-Pivot" to_port="example set input"/>
    <connect from_op="De-Pivot" from_port="example set output" to_op="Remove Duplicates (2)" to_port="example set input"/>
    <connect from_op="Remove Duplicates (2)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Transpose" to_port="example set input"/>
    <connect from_op="Transpose" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Filter Example Range (3)" from_port="example set output" to_op="Work on Subset" to_port="example set"/>
    <connect from_op="Work on Subset" from_port="example set" to_port="output 1"/>
    <portSpacing port="source_example set" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="447" y="34"/>
    <connect from_op="Read Excel" from_port="output" to_op="Loop Examples" to_port="example set"/>
    <connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

     

Answers

  • kaymankayman Member Posts: 662 Unicorn

    Use the replace operator and add following regex

     

    ^(.*?)\|.*$

    and replace with :

    $1

    This will only keep the part from the beginning untill the first pipe symbol

  • asn4293asn4293 Member Posts: 24 Contributor I

    It did not work as it removed ENG also. It is looking like this 

    1 1.0 Fin FR x John
    2 2.0 HR ENG y Mickeal
    3 3.0 HR ENG y Sean



    Should Look Like

    Id name skills department language experience
    1 John x Fin FR|ENG 3
    2 Mickeal y HR ENG 5
    3 Sean y HR ENG 1



  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn

    @asn4293 does not the Aggregate operator work for you? You can Group by Name and Language?

  • asn4293asn4293 Member Posts: 24 Contributor I

    @Thomas_Ott 
    The data which is mentioned is sample set not the actual data.

    Actual data is having more than 40 attributes and 100,000 rows.
    When I use ConCat I don't even know except ID which attributes will have same values, So can not use other attributes using aggregate operator what will be the other values be like. for example in one row I have LogisticsI Logistics I Logistics It should have been like one Logistics as it is same.  

     

    Logistics|Logistics|Logistics Logisztikai koordinátor|Disztribúciós koordinátor|Logisztikai asszisztens Double Decket Kft.|Art Media Kft.|Gate Media International Kft. Not Imported|Not Imported|Not Imported Koordinátor|Asszisztens|Asszisztens     Logistics Warehouse and Distribution Manager N/A   Supply Chain Management / Logistics (SC)     Logistics Coordinator     Yes     Hungary EMEA Europe/Middle East/Africa English

     

     

     

    The only Solution I could think of is to remove every duplication in each row rather than whole data. Would have been perfect if Aggregate removes all concat duplicates and keep only one.

    Hope it helped you to understand it better. 

  • BalazsBaranyBalazsBarany Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert Posts: 955 Unicorn

    Hi!

     

    You could use Aggregate with the default aggregation and group by ID. 

    Then it would give you the concatenated values (if you select that as the default aggregation function).

     

    You would apply the regexp replace solution to just the fields you want (or use a variant that just removes identical repetitions). 

     

    Regards,

    Balázs

  • asn4293asn4293 Member Posts: 24 Contributor I

     

    Americas|EMEA Europe/Middle East/Africa|APAC Asia/Pacific|EMEA Europe/Middle East/Africa
    Currently not mobile
    Currently not mobile
    Americas
    Americas
     
    EMEA Europe/Middle East/Africa
    Americas
    EMEA Europe/Middle East/Africa|APAC Asia/Pacific|EMEA Europe/Middle East/Africa|EMEA Europe/Middle East/Africa|Americas|EMEA Europe/Middle East/Africa|EMEA Europe/Middle East/Africa


    These are the fields in the columns I want to restrict it to only unique values can it be done if so please help me.

    Results Should Look Like This

    Americas|APAC Asia/Pacific|EMEA Europe/Middle East/Africa
    Currently not mobile
    Currently not mobile
    Americas
    Americas
     
    EMEA Europe/Middle East/Africa
    Americas
    EMEA Europe/Middle East/Africa|APAC Asia/Pacific|Americas
  • asn4293asn4293 Member Posts: 24 Contributor I
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="112" y="85">
    <parameter key="excel_file" value="C:\Users\shahida1\Desktop\Mobility.xlsx"/>
    <parameter key="imported_cell_range" value="A1:B18473"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="User Sys ID.true.integer.id"/>
    <parameter key="1" value="Region.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role" width="90" x="246" y="85">
    <parameter key="attribute_name" value="User Sys ID"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="aggregate" compatibility="8.1.000" expanded="true" height="82" name="Aggregate" width="90" x="380" y="85">
    <parameter key="attribute_filter_type" value="regular_expression"/>
    <parameter key="regular_expression" value="\b(\w+)(?:,\s+\1\b)+."/>
    <parameter key="default_aggregation_function" value="concatenation"/>
    <list key="aggregation_attributes">
    <parameter key="Region" value="concatenation"/>
    </list>
    <parameter key="group_by_attributes" value="User Sys ID"/>
    <parameter key="only_distinct" value="true"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.000" expanded="true" height="82" name="Replace (2)" width="90" x="112" y="442">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="concat(Region)"/>
    <parameter key="replace_what" value="\b(\w+)(?:\s+\1\b)+"/>
    <parameter key="replace_by" value="$1"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.000" expanded="true" height="82" name="Set Role (3)" width="90" x="514" y="340">
    <parameter key="attribute_name" value="User Sys ID"/>
    <parameter key="target_role" value="id"/>
    <list key="set_additional_roles"/>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Aggregate" from_port="example set output" to_op="Replace (2)" to_port="example set input"/>
    <connect from_op="Replace (2)" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
    <connect from_op="Set Role (3)" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    Hi @Thomas_Ott @lionelderkrikor @sgenzer,

    Can you please help me with regex I copied from web to remove exact words in the same rows but it is not working code is attached with file plese help me.
    https://drive.google.com/open?id=1EBFYfCG0y3UVNFNayqichdiW3Pf6wumU


  • Edin_KlapicEdin_Klapic Moderator, Employee, RMResearcher, Member Posts: 299 RM Data Scientist

    Hi @asn4293,

     

    This process produces the desired result. You may need to adapt it to your Attributes.

     

    Happy Mining,

    Edin

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.1.003" expanded="true" height="68" name="Read Excel" width="90" x="112" y="34">
    <parameter key="excel_file" value="C:\Users\EdinKlapic\Desktop\Duplicates test.xlsx"/>
    <parameter key="imported_cell_range" value="A1:A10"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="duplicates.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="loop_examples" compatibility="8.1.003" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="34">
    <process expanded="true">
    <operator activated="true" class="filter_example_range" compatibility="8.1.003" expanded="true" height="82" name="Filter Example Range" width="90" x="45" y="34">
    <parameter key="first_example" value="%{example}"/>
    <parameter key="last_example" value="%{example}"/>
    </operator>
    <operator activated="true" class="split" compatibility="8.1.003" expanded="true" height="82" name="Split" width="90" x="179" y="34">
    <parameter key="split_pattern" value="\|"/>
    </operator>
    <operator activated="true" class="transpose" compatibility="8.1.003" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="id"/>
    <parameter key="invert_selection" value="true"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="remove_duplicates" compatibility="8.1.003" expanded="true" height="103" name="Remove Duplicates" width="90" x="581" y="34"/>
    <operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="715" y="34">
    <list key="aggregation_attributes">
    <parameter key="att_1" value="concatenation"/>
    </list>
    <parameter key="only_distinct" value="true"/>
    </operator>
    <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Split" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_op="Transpose" to_port="example set input"/>
    <connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
    <connect from_op="Remove Duplicates" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Aggregate" from_port="example set output" to_port="output 1"/>
    <portSpacing port="source_example set" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="append" compatibility="8.1.003" expanded="true" height="82" name="Append" width="90" x="380" y="34"/>
    <connect from_op="Read Excel" from_port="output" to_op="Loop Examples" to_port="example set"/>
    <connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>
  • asn4293asn4293 Member Posts: 24 Contributor I

    @Edin_Klapic Thank you for reply ,

    Can you please share file you used on this model or if possible please apply it on my file which is 

    Here https://drive.google.com/open?id=1EBFYfCG0y3UVNFNayqichdiW3Pf6wumU

  • asn4293asn4293 Member Posts: 24 Contributor I

    @Edin_Klapic Thank you I was able to figure it out but with ID it all mess things up can you please help. I tried to join ID later but was not able to do it.

     

    Also, please have this link for this new file  https://drive.google.com/open?id=1MBqi460DL79ssLGBzYalHSyIkwVWKbpL

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
    <parameter key="excel_file" value="C:\Users\shahida1\Desktop\MobilityConcat.xlsx"/>
    <parameter key="imported_cell_range" value="A1:B14065"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="User Sys ID.true.integer.id"/>
    <parameter key="1" value="Mobility Region.true.polynominal.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="112" y="238">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Mobility Region"/>
    </operator>
    <operator activated="true" class="loop_examples" compatibility="8.1.000" expanded="true" height="103" name="Loop Examples" width="90" x="246" y="34">
    <process expanded="true">
    <operator activated="true" class="filter_example_range" compatibility="8.1.000" expanded="true" height="82" name="Filter Example Range" width="90" x="45" y="34">
    <parameter key="first_example" value="%{example}"/>
    <parameter key="last_example" value="%{example}"/>
    </operator>
    <operator activated="true" class="split" compatibility="8.1.000" expanded="true" height="82" name="Split" width="90" x="179" y="34">
    <parameter key="split_pattern" value="\|"/>
    </operator>
    <operator activated="true" class="transpose" compatibility="8.1.000" expanded="true" height="82" name="Transpose" width="90" x="313" y="34"/>
    <operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="id"/>
    <parameter key="invert_selection" value="true"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="remove_duplicates" compatibility="8.1.000" expanded="true" height="103" name="Remove Duplicates" width="90" x="581" y="34"/>
    <operator activated="true" class="aggregate" compatibility="8.1.000" expanded="true" height="82" name="Aggregate" width="90" x="715" y="34">
    <list key="aggregation_attributes">
    <parameter key="att_1" value="concatenation"/>
    </list>
    <parameter key="only_distinct" value="true"/>
    </operator>
    <connect from_port="example set" to_op="Filter Example Range" to_port="example set input"/>
    <connect from_op="Filter Example Range" from_port="example set output" to_op="Split" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_op="Transpose" to_port="example set input"/>
    <connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
    <connect from_op="Remove Duplicates" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Aggregate" from_port="example set output" to_port="output 1"/>
    <portSpacing port="source_example set" spacing="0"/>
    <portSpacing port="sink_example set" spacing="0"/>
    <portSpacing port="sink_output 1" spacing="0"/>
    <portSpacing port="sink_output 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="append" compatibility="8.1.000" expanded="true" height="82" name="Append" width="90" x="380" y="34"/>
    <connect from_op="Read Excel" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
    <connect from_op="Loop Examples" from_port="output 1" to_op="Append" to_port="example set 1"/>
    <connect from_op="Append" from_port="merged set" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

      Thank you and have a nice day.

  • asn4293asn4293 Member Posts: 24 Contributor I

    @Edin_Klapic Thank you so very much for contributing and helping me around this data 

Sign In or Register to comment.