Sign In
Register
Products
Solutions
Resources
Pricing
Partners
Company
Howdy, Stranger!
It looks like you're new here. Sign in or register to get started.
Sign In with RapidMiner
Sign In with RapidMiner
Sign In
Register
Quick Links
Categories
Recent Discussions
Best Of...
Unanswered
Groups
Categories
All Categories
17.6K
Help
413
Knowledge Base
Altair RapidMiner Community
GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.
Discussion
Hi ,.....I have two attributes which contain text data.How to find fuzzy match score of each row .
Author
Date within
1 day
3 days
1 week
2 weeks
1 month
2 months
6 months
1 year
of
Examples: Monday, today, last week, Mar 26, 3/26/04
Search
💬
0 Comments
🔥
0 Discussions
👤
0 Members
🔌
0 Online
ASK A QUESTION
FIND HELPFUL VIDEOS
Home
›
Help
Options
Hi ,.....I have two attributes which contain text data.How to find fuzzy match score of each row .
Harshav
Member
Posts:
33
Contributor I
May 2021
in
Help
I saw there is a fuzzy match function in generate attributes .But I'm facing this error
.Can anyone please help me with this ?
0
Answers
Options
MartinLiebig
Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor
Posts:
3,511
RM Data Scientist
May 2021
Hi,
please have a look at the attached process, it works for me? Not sure what your exact expression ist.
Best,
Martin
<?xml version="1.0" encoding="UTF-8"?><process version="9.9.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Subprocess" origin="GENERATED_TUTORIAL" width="90" x="112" y="34">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification" origin="GENERATED_TUTORIAL" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="Attribute1" value=""abc""/>
<parameter key="Attribute2" value=""ab""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (2)" origin="GENERATED_TUTORIAL" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="Attribute1" value=""xxx""/>
<parameter key="Attribute2" value=""yyy""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (3)" origin="GENERATED_TUTORIAL" width="90" x="45" y="238">
<list key="attribute_values">
<parameter key="Attribute1" value=""This is one Text""/>
<parameter key="Attribute2" value=""This is another Text""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (4)" origin="GENERATED_TUTORIAL" width="90" x="45" y="340">
<list key="attribute_values">
<parameter key="Attribute1" value=""ABC""/>
<parameter key="Attribute2" value=""abc""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="9.9.000" expanded="true" height="145" name="Append" origin="GENERATED_TUTORIAL" width="90" x="246" y="85">
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="merge_type" value="all"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Append" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Generate Example Data</description>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.9.000" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="34">
<list key="function_descriptions">
<parameter key="distance" value="fuzzy_match(Attribute1,Attribute2,LEVENSHTEIN_TOKEN_SET_RATIO)"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
- Sr. Director Data Solutions, Altair RapidMiner -
Dortmund, Germany
0
Options
Harshav
Member
Posts:
33
Contributor I
May 2021
This is my expression,dont know why its throwing error
0
Options
Harshav
Member
Posts:
33
Contributor I
May 2021
And FYI , I'm using this function in sparkrm radoop operator.
0
Options
Harshav
Member
Posts:
33
Contributor I
May 2021
0
Options
Harshav
Member
Posts:
33
Contributor I
May 2021
@mschmitz
0
Sign In
or
Register
to comment.
Answers
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Subprocess" origin="GENERATED_TUTORIAL" width="90" x="112" y="34">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification" origin="GENERATED_TUTORIAL" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="Attribute1" value=""abc""/>
<parameter key="Attribute2" value=""ab""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (2)" origin="GENERATED_TUTORIAL" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="Attribute1" value=""xxx""/>
<parameter key="Attribute2" value=""yyy""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (3)" origin="GENERATED_TUTORIAL" width="90" x="45" y="238">
<list key="attribute_values">
<parameter key="Attribute1" value=""This is one Text""/>
<parameter key="Attribute2" value=""This is another Text""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="9.9.000" expanded="true" height="68" name="Generate Data by User Specification (4)" origin="GENERATED_TUTORIAL" width="90" x="45" y="340">
<list key="attribute_values">
<parameter key="Attribute1" value=""ABC""/>
<parameter key="Attribute2" value=""abc""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="9.9.000" expanded="true" height="145" name="Append" origin="GENERATED_TUTORIAL" width="90" x="246" y="85">
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="merge_type" value="all"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Append" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Generate Example Data</description>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.9.000" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="34">
<list key="function_descriptions">
<parameter key="distance" value="fuzzy_match(Attribute1,Attribute2,LEVENSHTEIN_TOKEN_SET_RATIO)"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Dortmund, Germany