The Altair Community and the RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here.

Hi ,.....I have two attributes which contain text data.How to find fuzzy match score of each row .

HarshavHarshav Member Posts: 33 Contributor I
I saw there is a fuzzy match function in generate attributes .But I'm facing this error .Can anyone please help me with this ?


  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,525 RM Data Scientist
    please have a look at the attached process, it works for me? Not sure what your exact expression ist.


    <?xml version="1.0" encoding="UTF-8"?><process version="9.9.000">
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Subprocess" origin="GENERATED_TUTORIAL" width="90" x="112" y="34">
            <process expanded="true">
              <operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification" origin="GENERATED_TUTORIAL" width="90" x="45" y="34">
                <list key="attribute_values">
                  <parameter key="Attribute1" value="&quot;abc&quot;"/>
                  <parameter key="Attribute2" value="&quot;ab&quot;"/>
                <list key="set_additional_roles"/>
              <operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (2)" origin="GENERATED_TUTORIAL" width="90" x="45" y="136">
                <list key="attribute_values">
                  <parameter key="Attribute1" value="&quot;xxx&quot;"/>
                  <parameter key="Attribute2" value="&quot;yyy&quot;"/>
                <list key="set_additional_roles"/>
              <operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (3)" origin="GENERATED_TUTORIAL" width="90" x="45" y="238">
                <list key="attribute_values">
                  <parameter key="Attribute1" value="&quot;This is one Text&quot;"/>
                  <parameter key="Attribute2" value="&quot;This is another Text&quot;"/>
                <list key="set_additional_roles"/>
              <operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (4)" origin="GENERATED_TUTORIAL" width="90" x="45" y="340">
                <list key="attribute_values">
                  <parameter key="Attribute1" value="&quot;ABC&quot;"/>
                  <parameter key="Attribute2" value="&quot;abc&quot;"/>
                <list key="set_additional_roles"/>
              <operator activated="true" class="append" compatibility="9.9.000" expanded="true" height="145" name="Append" origin="GENERATED_TUTORIAL" width="90" x="246" y="85">
                <parameter key="datamanagement" value="double_array"/>
                <parameter key="data_management" value="auto"/>
                <parameter key="merge_type" value="all"/>
              <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
              <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
              <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
              <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
              <connect from_op="Append" from_port="merged set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            <description align="center" color="transparent" colored="false" width="126">Generate Example Data</description>
          <operator activated="true" class="generate_attributes" compatibility="9.9.000" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="34">
            <list key="function_descriptions">
              <parameter key="distance" value="fuzzy_match(Attribute1,Attribute2,LEVENSHTEIN_TOKEN_SET_RATIO)"/>
            <parameter key="keep_all" value="true"/>
          <connect from_op="Subprocess" from_port="out 1" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>

    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • Options
    HarshavHarshav Member Posts: 33 Contributor I
    This is my expression,dont know why its throwing error
  • Options
    HarshavHarshav Member Posts: 33 Contributor I
    And FYI , I'm using this function in sparkrm radoop operator.
  • Options
    HarshavHarshav Member Posts: 33 Contributor I

  • Options
    HarshavHarshav Member Posts: 33 Contributor I
Sign In or Register to comment.