Options

How to split a sentence by conjunction words in sentence?

HeikoeWin786HeikoeWin786 Member Posts: 64 Contributor II
Hi,

I want to split the sentence using split operator. But, not with regular expression.
I want the sentence to be split by conjunction words e.g. and, but, however, yet
E.g. The staff is bad, and the room is not good, but the food is great.
This sentence should split as
The staff is bad
the room is not good
the food is great

Can anyone suggest me how I can achieve this in rapidminer?

thanks a lot

Answers

  • Options
    Robi_MeRobi_Me Member Posts: 32 Maven
    edited January 2021
    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.002">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.7.002" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="289">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="conjunction,replace&#10;also,§also&#10;besides,besides&#10;furthermore,§furthermore&#10;likewise,§likewise&#10;moreover,§moreover&#10;however,§however&#10;nevertheless,§nevertheless&#10;nonetheless,§nonetheless&#10;still,§still&#10;conversely,§conversely&#10;instead,§instead&#10;otherwise,§instead&#10;rather,§instead&#10;accordingly,§instead&#10;consequently,§instead&#10;hence,§instead&#10;meanwhile,§instead&#10;then,§instead&#10;therefore,§instead&#10;thus,§instead&#10;and,§and&#10;but,§but"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="136">
            <parameter key="generator_type" value="attribute functions"/>
            <parameter key="number_of_examples" value="1"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions">
              <parameter key="text" value="&quot;The staff is bad, and the room is not good, but the food is great.&quot;"/>
            </list>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="text&#10;The staff is bad, and the room is not good, but the food is great."/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="replace_dictionary" compatibility="9.7.002" expanded="true" height="103" name="Replace (Dictionary)" width="90" x="380" y="187">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="from_attribute" value="conjunction"/>
            <parameter key="to_attribute" value="replace"/>
            <parameter key="use_regular_expressions" value="false"/>
            <parameter key="convert_to_lowercase" value="false"/>
            <parameter key="first_match_only" value="false"/>
          </operator>
          <operator activated="true" class="split" compatibility="9.7.002" expanded="true" height="82" name="Split" width="90" x="514" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="split_pattern" value="§"/>
            <parameter key="split_mode" value="ordered_split"/>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Replace (Dictionary)" to_port="dictionary"/>
          <connect from_op="Create ExampleSet (2)" from_port="output" to_op="Replace (Dictionary)" to_port="example set input"/>
          <connect from_op="Replace (Dictionary)" from_port="example set output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    


Sign In or Register to comment.