RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.

CLICK HERE TO DOWNLOAD

splitting names

pascasiwpascasiw Member Posts: 2 Contributor I
edited December 2018 in Help

i want to separate names from a column.  For example

smith john b & mary

 

should give me

smith john b

smith mary

 

i am able to split the name but not able to bring the "last name - smith over to second person"

 

so i am getting

 

smith john b

mary <-----  want to see "smith mary"

 

in excel i can use the "left" statement until it find space but not sure how to do this in rapidminer

 

Answers

  • lionelderkrikorlionelderkrikor Moderator, RapidMiner Certified Analyst, Member Posts: 1,056   Unicorn

    Hi @pascasiw,

     

    Here a possible solution : 

    <?xml version="1.0" encoding="UTF-8"?><process version="9.0.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="9.0.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="9.0.001" expanded="true" height="68" name="Read Excel" width="90" x="112" y="136">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Split_names\Split_names.xlsx"/>
    <list key="annotations"/>
    <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="Id.true.integer.attribute"/>
    <parameter key="1" value="names.true.polynominal.attribute"/>
    </list>
    <parameter key="read_not_matching_values_as_missings" value="false"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="9.0.001" expanded="true" height="103" name="Multiply" width="90" x="112" y="238"/>
    <operator activated="true" class="split" compatibility="9.0.001" expanded="true" height="82" name="Split" width="90" x="313" y="136">
    <parameter key="split_pattern" value="([\s]+)"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="9.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="136">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Id|names_1"/>
    </operator>
    <operator activated="true" class="split" compatibility="9.0.001" expanded="true" height="82" name="Split (2)" width="90" x="313" y="238">
    <parameter key="split_pattern" value="^[^\s]*\s"/>
    </operator>
    <operator activated="true" class="concurrency:join" compatibility="9.0.001" expanded="true" height="82" name="Join" width="90" x="581" y="187">
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes">
    <parameter key="Id" value="Id"/>
    </list>
    </operator>
    <operator activated="true" class="split" compatibility="9.0.001" expanded="true" height="82" name="Split (3)" width="90" x="715" y="187">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="names_2"/>
    <parameter key="split_pattern" value="&amp;"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="9.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="849" y="187">
    <list key="function_descriptions">
    <parameter key="name1" value="concat(names_1,&quot; &quot;,names_2_1)"/>
    <parameter key="name2" value="concat(names_1,&quot; &quot;,names_2_2)"/>
    </list>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="9.0.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="983" y="187">
    <parameter key="attribute_filter_type" value="regular_expression"/>
    <parameter key="regular_expression" value="names_.*"/>
    <parameter key="invert_selection" value="true"/>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Multiply" to_port="input"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Split" to_port="example set input"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Split (2)" to_port="example set input"/>
    <connect from_op="Split" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Join" to_port="left"/>
    <connect from_op="Split (2)" from_port="example set output" to_op="Join" to_port="right"/>
    <connect from_op="Join" from_port="join" to_op="Split (3)" to_port="example set input"/>
    <connect from_op="Split (3)" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it helps,

     

    Regards,

     

    Lionel

    sgenzer
  • pascasiwpascasiw Member Posts: 2 Contributor I

    Lionel,

     

    Thanks for your reply.  I have been testing the solution you provided but still facing some other issues.  I will let you know once I have it working. 

  • Telcontar120Telcontar120 Moderator, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,368   Unicorn

    I agree with Lionel that the Spilt operator will allow you to separate all the names out into separate attributes and then you can combine them using subsequent rules of your choice.

    Brian T.
    Lindon Ventures 
    Data Science Consulting from Certified RapidMiner Experts
    sgenzer
Sign In or Register to comment.