Options

Awkward Error on Dictionary Based Sentimental Analysis Process

MelihckMelihck Member Posts: 8 Learner I
edited December 2020 in Help
Hi there! 

While I am exploring the sentimental analysis I found an XML process from @Benedict_von_Ahmschmitz in the forum.

In their model, they basically put the text into the create_document operator. But in my case, I had to use a dataset because I am working with Tweets. After I made the changes the error the below appears:



My XML is below if anyone can help I would definitely appreciate it🙏.
Also, I would love to see different models with more emotions like "fear" "anger" "joy" etc. instead of positive and negative. Please up me if you know any.

Thanks in advance for your help and for this amazing software.


<?xml version="1.0" encoding="UTF-8"?><process version="9.8.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.8.001" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="9.8.001" expanded="true" height="68" name="Read Excel" width="90" x="112" y="493">
        <parameter key="excel_file" value="D:/Akademik/twitter duygu analizi makale/data-set/EmoLex/NRC-Emotion-Lexicon-v0.92-In105Languages-Nov2017Translations.xlsx"/>
        <parameter key="sheet_selection" value="sheet number"/>
        <parameter key="sheet_number" value="3"/>
        <parameter key="imported_cell_range" value="A1"/>
        <parameter key="encoding" value="SYSTEM"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations"/>
        <parameter key="date_format" value=""/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="locale" value="Turkish"/>
        <parameter key="read_all_values_as_polynominal" value="false"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="A.true.polynominal.attribute"/>
        </list>
        <parameter key="read_not_matching_values_as_missings" value="false"/>
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="data_management" value="auto"/>
        <description align="center" color="transparent" colored="false" width="126">Adapt location please</description>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="9.8.001" expanded="true" height="82" name="Generate Attributes" width="90" x="246" y="493">
        <list key="function_descriptions">
          <parameter key="Score" value="-1"/>
          <parameter key="A" value="lower(A)"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="9.8.001" expanded="true" height="82" name="Rename" width="90" x="380" y="493">
        <parameter key="old_name" value="A"/>
        <parameter key="new_name" value="Word"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="read_excel" compatibility="9.8.001" expanded="true" height="68" name="Read Excel (2)" width="90" x="112" y="646">
        <parameter key="excel_file" value="D:/Akademik/twitter duygu analizi makale/data-set/EmoLex/NRC-Emotion-Lexicon-v0.92-In105Languages-Nov2017Translations.xlsx"/>
        <parameter key="sheet_selection" value="sheet number"/>
        <parameter key="sheet_number" value="2"/>
        <parameter key="imported_cell_range" value="A1"/>
        <parameter key="encoding" value="SYSTEM"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations"/>
        <parameter key="date_format" value=""/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="locale" value="Turkish"/>
        <parameter key="read_all_values_as_polynominal" value="false"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="A.true.polynominal.attribute"/>
        </list>
        <parameter key="read_not_matching_values_as_missings" value="false"/>
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="data_management" value="auto"/>
        <description align="center" color="transparent" colored="false" width="126">Adapt location please</description>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="9.8.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="246" y="646">
        <list key="function_descriptions">
          <parameter key="Score" value="+1"/>
          <parameter key="A" value="lower(A)"/>
        </list>
        <parameter key="keep_all" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="9.8.001" expanded="true" height="82" name="Rename (2)" width="90" x="380" y="646">
        <parameter key="old_name" value="A"/>
        <parameter key="new_name" value="Word"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="append" compatibility="9.8.001" expanded="true" height="103" name="Append" width="90" x="514" y="544">
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="merge_type" value="all"/>
      </operator>
      <operator activated="true" class="operator_toolbox:dictionary_sentiment_learner" compatibility="2.8.001" expanded="true" height="103" name="Dictionary-Based Sentiment (Documents)" width="90" x="648" y="544">
        <parameter key="value_attribute" value="Score"/>
        <parameter key="key_attribute" value="Word"/>
        <parameter key="negation_attribute" value=""/>
        <parameter key="negation_window_size" value="1"/>
        <parameter key="negation_strength" value=""/>
        <parameter key="use_symmetric_negation_window" value="false"/>
        <parameter key="use_intensifier" value="false"/>
        <parameter key="intensifier_word" value=""/>
        <parameter key="intensifier_value" value=""/>
        <parameter key="use_symmetric_intensifier_window" value="false"/>
      </operator>
      <operator activated="true" class="read_excel" compatibility="9.8.001" expanded="true" height="68" name="Read Excel (3)" width="90" x="112" y="85">
        <parameter key="excel_file" value="D:/Akademik/twitter duygu analizi makale/data-set/TEST DATA/toplam-R.xlsx"/>
        <parameter key="sheet_selection" value="sheet number"/>
        <parameter key="sheet_number" value="1"/>
        <parameter key="imported_cell_range" value="A1"/>
        <parameter key="encoding" value="SYSTEM"/>
        <parameter key="first_row_as_names" value="true"/>
        <list key="annotations"/>
        <parameter key="date_format" value=""/>
        <parameter key="time_zone" value="SYSTEM"/>
        <parameter key="locale" value="English (United States)"/>
        <parameter key="read_all_values_as_polynominal" value="false"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="ID.true.real.attribute"/>
          <parameter key="1" value="User Name.true.polynominal.attribute"/>
          <parameter key="2" value="Universal Time Stamp.true.polynominal.attribute"/>
          <parameter key="3" value="Local Time Stamp.true.polynominal.attribute"/>
          <parameter key="4" value="Text.true.polynominal.attribute"/>
          <parameter key="5" value="Language.true.polynominal.attribute"/>
          <parameter key="6" value="Profile Image.true.polynominal.attribute"/>
          <parameter key="7" value="Source.true.polynominal.attribute"/>
          <parameter key="8" value="Location.true.polynominal.attribute"/>
          <parameter key="9" value="Time Zone.true.polynominal.attribute"/>
          <parameter key="10" value="Geo.true.polynominal.attribute"/>
          <parameter key="11" value="Hashtags.true.polynominal.attribute"/>
          <parameter key="12" value="Urls.true.polynominal.attribute"/>
          <parameter key="13" value="User Mentions.true.polynominal.attribute"/>
          <parameter key="14" value="Media.true.polynominal.attribute"/>
          <parameter key="15" value="Follower Count.true.integer.attribute"/>
          <parameter key="16" value="Name.true.polynominal.attribute"/>
        </list>
        <parameter key="read_not_matching_values_as_missings" value="false"/>
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="data_management" value="auto"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.8.001" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="187">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Text"/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="9.8.001" expanded="true" height="82" name="Nominal to Text" width="90" x="380" y="187">
        <parameter key="attribute_filter_type" value="all"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="nominal"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="file_path"/>
        <parameter key="block_type" value="single_value"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="single_value"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data" width="90" x="581" y="187">
        <parameter key="create_word_vector" value="false"/>
        <parameter key="vector_creation" value="TF-IDF"/>
        <parameter key="add_meta_information" value="false"/>
        <parameter key="keep_text" value="true"/>
        <parameter key="prune_method" value="absolute"/>
        <parameter key="prune_below_percent" value="3.0"/>
        <parameter key="prune_above_percent" value="30.0"/>
        <parameter key="prune_below_absolute" value="2"/>
        <parameter key="prune_above_absolute" value="9999"/>
        <parameter key="prune_below_rank" value="0.05"/>
        <parameter key="prune_above_rank" value="0.95"/>
        <parameter key="datamanagement" value="double_sparse_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="select_attributes_and_weights" value="false"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize (2)" width="90" x="112" y="34">
            <parameter key="mode" value="non letters"/>
            <parameter key="characters" value=".:"/>
            <parameter key="language" value="English"/>
            <parameter key="max_token_length" value="3"/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases (2)" width="90" x="246" y="34">
            <parameter key="transform_to" value="lower case"/>
          </operator>
          <operator activated="true" class="text:stem_dictionary" compatibility="9.3.001" expanded="true" height="82" name="Stem (Dictionary)" width="90" x="380" y="34">
            <parameter key="file" value="D:/Akademik/twitter duygu analizi makale/data-set/TEST DATA/dict-test.txt"/>
          </operator>
          <operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="581" y="34">
            <parameter key="min_chars" value="4"/>
            <parameter key="max_chars" value="15"/>
          </operator>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="9.3.001" expanded="true" height="82" name="Filter Stopwords (Dictionary)" width="90" x="715" y="34">
            <parameter key="file" value="D:/Akademik/twitter duygu analizi makale/data-set/Stem-dict/stop-words.txt"/>
            <parameter key="case_sensitive" value="false"/>
            <parameter key="encoding" value="UTF-8"/>
          </operator>
          <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
          <connect from_op="Transform Cases (2)" from_port="document" to_op="Stem (Dictionary)" to_port="document"/>
          <connect from_op="Stem (Dictionary)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
          <connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="collect" compatibility="9.8.001" expanded="true" height="82" name="Collect" width="90" x="782" y="187">
        <parameter key="unfold" value="false"/>
      </operator>
      <operator activated="true" class="operator_toolbox:apply_model_documents" compatibility="2.8.001" expanded="true" height="103" name="Apply Model (Documents)" width="90" x="916" y="391">
        <list key="application_parameters"/>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Append" to_port="example set 1"/>
      <connect from_op="Read Excel (2)" from_port="output" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
      <connect from_op="Rename (2)" from_port="example set output" to_op="Append" to_port="example set 2"/>
      <connect from_op="Append" from_port="merged set" to_op="Dictionary-Based Sentiment (Documents)" to_port="exa"/>
      <connect from_op="Dictionary-Based Sentiment (Documents)" from_port="mod" to_op="Apply Model (Documents)" to_port="mod"/>
      <connect from_op="Read Excel (3)" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
      <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Collect" to_port="input 1"/>
      <connect from_op="Collect" from_port="collection" to_op="Apply Model (Documents)" to_port="doc"/>
      <connect from_op="Apply Model (Documents)" from_port="exa" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="168"/>
      <description align="center" color="yellow" colored="false" height="334" resized="true" width="863" x="69" y="30">This reads tweets from the sheets, tokinize, stem and removing stop words</description>
      <description align="center" color="yellow" colored="false" height="335" resized="true" width="763" x="26" y="454">This generates the dictionary as needed in the &amp;quot;Dict based Sentiment&amp;quot; operator</description>
    </process>
  </operator>
</process>



Best Answer

  • Options
    akinozenakinozen Member Posts: 4 Contributor I
    Solution Accepted
    melihck
    I did it like this.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.8.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="subprocess" compatibility="9.8.000" expanded="true" height="82" name="Subprocess" origin="GENERATED_TUTORIAL" width="90" x="179" y="442">
            <process expanded="true">
              <operator activated="false" class="generate_data_user_specification" compatibility="9.8.000" expanded="true" height="68" name="Generate Data by User Specification" origin="GENERATED_TUTORIAL" width="90" x="447" y="289">
                <list key="attribute_values">
                  <parameter key="Key" value="&quot;good&quot;"/>
                  <parameter key="Value" value="1"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="false" class="generate_data_user_specification" compatibility="9.8.000" expanded="true" height="68" name="Generate Data by User Specification (2)" origin="GENERATED_TUTORIAL" width="90" x="313" y="289">
                <list key="attribute_values">
                  <parameter key="Key" value="&quot;bad&quot;"/>
                  <parameter key="Value" value="-1"/>
                </list>
                <list key="set_additional_roles"/>
              </operator>
              <operator activated="false" class="generate_id" compatibility="9.8.000" expanded="true" height="82" name="Generate ID" width="90" x="179" y="289">
                <parameter key="create_nominal_ids" value="false"/>
                <parameter key="offset" value="0"/>
              </operator>
              <operator activated="false" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_anger" width="90" x="45" y="34">
                <parameter key="repository_entry" value="//Local Repository/DICTIONARY/ncr_anger"/>
              </operator>
              <operator activated="false" class="generate_attributes" compatibility="9.8.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="380" y="34">
                <list key="function_descriptions">
                  <parameter key="anger" value="anger"/>
                  <parameter key="anger_score" value="SCORE"/>
                </list>
                <parameter key="keep_all" value="true"/>
              </operator>
              <operator activated="false" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_fear" width="90" x="45" y="238">
                <parameter key="repository_entry" value="//Local Repository/DICTIONARY/ncr_fear"/>
              </operator>
              <operator activated="false" class="generate_attributes" compatibility="9.8.000" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="514" y="238">
                <list key="function_descriptions">
                  <parameter key="fear" value="fear"/>
                  <parameter key="fear_score" value="score"/>
                </list>
                <parameter key="keep_all" value="true"/>
              </operator>
              <operator activated="false" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_joy" width="90" x="45" y="136">
                <parameter key="repository_entry" value="//Local Repository/DICTIONARY/ncr_joy"/>
              </operator>
              <operator activated="false" class="generate_attributes" compatibility="9.8.000" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="136">
                <list key="function_descriptions">
                  <parameter key="joy" value="joy"/>
                  <parameter key="joy_score" value="SCORE"/>
                </list>
                <parameter key="keep_all" value="true"/>
              </operator>
              <operator activated="true" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_2016_sentiment_SCL-NMA" width="90" x="179" y="85">
                <parameter key="repository_entry" value="ncr_2016_sentiment_SCL-NMA"/>
              </operator>
              <operator activated="true" class="append" compatibility="9.8.000" expanded="true" height="82" name="Append" origin="GENERATED_TUTORIAL" width="90" x="581" y="85">
                <parameter key="datamanagement" value="double_array"/>
                <parameter key="data_management" value="auto"/>
                <parameter key="merge_type" value="all"/>
              </operator>
              <connect from_op="Retrieve ncr_2016_sentiment_SCL-NMA" from_port="output" to_op="Append" to_port="example set 1"/>
              <connect from_op="Append" from_port="merged set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">Generate dummy dictionary</description>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="9.8.000" expanded="true" height="68" name="Generate Data by User Specification (3)" origin="GENERATED_TUTORIAL" width="90" x="179" y="595">
            <list key="attribute_values">
              <parameter key="neg" value="&quot;not&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="operator_toolbox:dictionary_sentiment_learner" compatibility="2.8.001" expanded="true" height="103" name="Dictionary Based Sentiment" origin="GENERATED_TUTORIAL" width="90" x="648" y="391">
            <parameter key="value_attribute" value="score"/>
            <parameter key="key_attribute" value="sentiment"/>
            <parameter key="negation_attribute" value="neg"/>
            <parameter key="negation_window_size" value="1"/>
            <parameter key="negation_strength" value=""/>
            <parameter key="use_symmetric_negation_window" value="false"/>
            <parameter key="use_intensifier" value="false"/>
            <parameter key="intensifier_word" value=""/>
            <parameter key="intensifier_value" value=""/>
            <parameter key="use_symmetric_intensifier_window" value="false"/>
          </operator>
          <operator activated="true" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve Henn na Hotel Huis Ten Bosch" width="90" x="246" y="187">
            <parameter key="repository_entry" value="../ROBOT/Henn na Hotel Huis Ten Bosch"/>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.8.000" expanded="true" height="82" name="Nominal to Text" width="90" x="380" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="field-0"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="false" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="85">
            <parameter key="create_word_vector" value="false"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="false"/>
            <parameter key="keep_text" value="false"/>
            <parameter key="prune_method" value="none"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="select_attributes_and_weights" value="false"/>
            <list key="specify_weights"/>
            <process expanded="true">
              <operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases" width="90" x="112" y="34">
                <parameter key="transform_to" value="lower case"/>
              </operator>
              <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize" width="90" x="313" y="85">
                <parameter key="mode" value="non letters"/>
                <parameter key="characters" value=".:"/>
                <parameter key="language" value="English"/>
                <parameter key="max_token_length" value="3"/>
              </operator>
              <operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="447" y="34">
                <parameter key="min_chars" value="4"/>
                <parameter key="max_chars" value="25"/>
              </operator>
              <operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="581" y="85"/>
              <connect from_port="document" to_op="Transform Cases" to_port="document"/>
              <connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
              <connect from_op="Tokenize" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
              <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
              <connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="text:data_to_documents" compatibility="9.3.001" expanded="true" height="68" name="Data to Documents" width="90" x="514" y="187">
            <parameter key="select_attributes_and_weights" value="false"/>
            <list key="specify_weights"/>
          </operator>
          <operator activated="true" class="loop_collection" compatibility="9.8.000" expanded="true" height="82" name="Loop Collection" width="90" x="648" y="187">
            <parameter key="set_iteration_macro" value="false"/>
            <parameter key="macro_name" value="iteration"/>
            <parameter key="macro_start_value" value="1"/>
            <parameter key="unfold" value="false"/>
            <process expanded="true">
              <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize (2)" width="90" x="380" y="85">
                <parameter key="mode" value="non letters"/>
                <parameter key="characters" value=".:"/>
                <parameter key="language" value="English"/>
                <parameter key="max_token_length" value="3"/>
              </operator>
              <connect from_port="single" to_op="Tokenize (2)" to_port="document"/>
              <connect from_op="Tokenize (2)" from_port="document" to_port="output 1"/>
              <portSpacing port="source_single" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="operator_toolbox:apply_model_documents" compatibility="2.8.001" expanded="true" height="103" name="Apply Model (Documents)" origin="GENERATED_TUTORIAL" width="90" x="916" y="340">
            <list key="application_parameters"/>
          </operator>
          <operator activated="false" class="aggregate" compatibility="9.8.000" expanded="true" height="82" name="Aggregate" width="90" x="916" y="136">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="Scoring String" value="count"/>
            </list>
            <parameter key="group_by_attributes" value=""/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="true"/>
            <parameter key="ignore_missings" value="true"/>
          </operator>
          <connect from_op="Subprocess" from_port="out 1" to_op="Dictionary Based Sentiment" to_port="exa"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Dictionary Based Sentiment" to_port="neg"/>
          <connect from_op="Dictionary Based Sentiment" from_port="mod" to_op="Apply Model (Documents)" to_port="mod"/>
          <connect from_op="Retrieve Henn na Hotel Huis Ten Bosch" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
          <connect from_op="Data to Documents" from_port="documents" to_op="Loop Collection" to_port="collection"/>
          <connect from_op="Loop Collection" from_port="output 1" to_op="Apply Model (Documents)" to_port="doc"/>
          <connect from_op="Apply Model (Documents)" from_port="exa" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>



Answers

  • Options
    MelihckMelihck Member Posts: 8 Learner I
    I added the files in case of need.

    Thanks a lot 
  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,507 RM Data Scientist
    Hi,
    The operator expects documents, not tables. Please try Data to Documents and have a look at the tutorial process.

    Best,
    Martin
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • Options
    MelihckMelihck Member Posts: 8 Learner I
    edited December 2020
    Hi, @mschmitz thanks a lot. 

    I tried data to documents operator already. But this time it gives the error below:

    And about the tutorial, I was thinking to explore the "text and web mining" tutorial. I saw the headline "Automatic Classification of Documents" and maybe I can use it for the sentimental analysis. But if you have any other starting point I would appreciate it. 

    Best!
    Melih 




  • Options
    akinozenakinozen Member Posts: 4 Contributor I
    hello @mschmitz thank you very much for your posts. My question is how can I group the "scoring string" obtained in the above model. Then how can I sum the string values? Thank you
  • Options
    akinozenakinozen Member Posts: 4 Contributor I
    @mschmitz my result
  • Options
    MelihckMelihck Member Posts: 8 Learner I
    Hi @akinozen; thanks! It's really helped, now I can calculate the positive and negative scores of texts.

    But now I just noticed that dictionary-based sentimental operator designed for only negative and positive indicators. If I am wrong please correct me @mschmitz. Because I want to define different attributes like Anger, Anticipation,  Disgust, etc.

    I will try to do it with the "old-trick" where @jacobcybulski mention here: https://community.rapidminer.com/discussion/comment/64839#Comment_64839

    Hopefully, it will work. Thanks again for your support.
  • Options
    MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,507 RM Data Scientist
    no need to use @jacobcybulski 's trick. I mean the operator just calculates a score. It can be Anger, Digust etc. You just need to have the right dictionary.

    Best,
    Martin
    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany
  • Options
    akinozenakinozen Member Posts: 4 Contributor I
    How can we sum the scores of each category (anger, disgust etc) in the scoring string table? Can you suggest a dictionary of sentiment with verified scores? I use the dictionaries in the link.  dictionary link: http://sentiment.nrc.ca/lexicons-for-research/
    I want to create my result table according to my input dictionary and I want to sum the scores.
    I am grateful for your help.


    my model
    <?xml version="1.0" encoding="UTF-8"?><process version="9.8.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="subprocess" compatibility="9.8.000" expanded="true" height="82" name="Subprocess" origin="GENERATED_TUTORIAL" width="90" x="179" y="442">
            <process expanded="true">
              <operator activated="true" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_anger" width="90" x="45" y="34">
                <parameter key="repository_entry" value="//Local Repository/DICTIONARY/ncr_anger"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="9.8.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="380" y="34">
                <list key="function_descriptions">
                  <parameter key="word" value="anger"/>
                  <parameter key="score" value="SCORE"/>
                </list>
                <parameter key="keep_all" value="false"/>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="9.8.000" expanded="true" height="82" name="Select Attributes" width="90" x="514" y="34">
                <parameter key="attribute_filter_type" value="subset"/>
                <parameter key="attribute" value=""/>
                <parameter key="attributes" value="score|word"/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="attribute_value"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="time"/>
                <parameter key="block_type" value="attribute_block"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="value_matrix_row_start"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
              </operator>
              <operator activated="true" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_fear" width="90" x="45" y="238">
                <parameter key="repository_entry" value="//Local Repository/DICTIONARY/ncr_fear"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="9.8.000" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="514" y="238">
                <list key="function_descriptions">
                  <parameter key="word" value="fear"/>
                  <parameter key="score" value="score"/>
                </list>
                <parameter key="keep_all" value="false"/>
              </operator>
              <operator activated="true" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_joy" width="90" x="45" y="136">
                <parameter key="repository_entry" value="//Local Repository/DICTIONARY/ncr_joy"/>
              </operator>
              <operator activated="true" class="generate_attributes" compatibility="9.8.000" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="136">
                <list key="function_descriptions">
                  <parameter key="word" value="joy"/>
                  <parameter key="score" value="SCORE"/>
                </list>
                <parameter key="keep_all" value="false"/>
              </operator>
              <operator activated="false" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve ncr_2016_sentiment_SCL-NMA" width="90" x="179" y="85">
                <parameter key="repository_entry" value="ncr_2016_sentiment_SCL-NMA"/>
              </operator>
              <operator activated="true" class="append" compatibility="9.8.000" expanded="true" height="124" name="Append" origin="GENERATED_TUTORIAL" width="90" x="648" y="85">
                <parameter key="datamanagement" value="double_array"/>
                <parameter key="data_management" value="auto"/>
                <parameter key="merge_type" value="all"/>
              </operator>
              <connect from_op="Retrieve ncr_anger" from_port="output" to_op="Generate Attributes (2)" to_port="example set input"/>
              <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
              <connect from_op="Select Attributes" from_port="example set output" to_op="Append" to_port="example set 1"/>
              <connect from_op="Retrieve ncr_fear" from_port="output" to_op="Generate Attributes (3)" to_port="example set input"/>
              <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Append" to_port="example set 3"/>
              <connect from_op="Retrieve ncr_joy" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
              <connect from_op="Generate Attributes" from_port="example set output" to_op="Append" to_port="example set 2"/>
              <connect from_op="Append" from_port="merged set" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">Generate dummy dictionary</description>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="9.8.000" expanded="true" height="68" name="Generate Data by User Specification (3)" origin="GENERATED_TUTORIAL" width="90" x="179" y="595">
            <list key="attribute_values">
              <parameter key="neg" value="&quot;not&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="operator_toolbox:dictionary_sentiment_learner" compatibility="2.8.001" expanded="true" height="103" name="Dictionary Based Sentiment" origin="GENERATED_TUTORIAL" width="90" x="447" y="391">
            <parameter key="value_attribute" value="score"/>
            <parameter key="key_attribute" value="word"/>
            <parameter key="negation_attribute" value="neg"/>
            <parameter key="negation_window_size" value="1"/>
            <parameter key="negation_strength" value=""/>
            <parameter key="use_symmetric_negation_window" value="false"/>
            <parameter key="use_intensifier" value="false"/>
            <parameter key="intensifier_word" value=""/>
            <parameter key="intensifier_value" value=""/>
            <parameter key="use_symmetric_intensifier_window" value="false"/>
          </operator>
          <operator activated="true" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve Henn na Hotel Huis Ten Bosch" width="90" x="246" y="187">
            <parameter key="repository_entry" value="../ROBOT/Henn na Hotel Huis Ten Bosch"/>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.8.000" expanded="true" height="82" name="Nominal to Text" width="90" x="380" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="field-0"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="text:data_to_documents" compatibility="9.3.001" expanded="true" height="68" name="Data to Documents" width="90" x="514" y="187">
            <parameter key="select_attributes_and_weights" value="false"/>
            <list key="specify_weights"/>
          </operator>
          <operator activated="true" class="loop_collection" compatibility="9.8.000" expanded="true" height="82" name="Loop Collection" width="90" x="648" y="187">
            <parameter key="set_iteration_macro" value="false"/>
            <parameter key="macro_name" value="iteration"/>
            <parameter key="macro_start_value" value="1"/>
            <parameter key="unfold" value="false"/>
            <process expanded="true">
              <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize (2)" width="90" x="112" y="34">
                <parameter key="mode" value="non letters"/>
                <parameter key="characters" value=".:"/>
                <parameter key="language" value="English"/>
                <parameter key="max_token_length" value="3"/>
              </operator>
              <operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases (2)" width="90" x="246" y="34">
                <parameter key="transform_to" value="lower case"/>
              </operator>
              <operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length) (2)" width="90" x="380" y="34">
                <parameter key="min_chars" value="2"/>
                <parameter key="max_chars" value="25"/>
              </operator>
              <operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English) (2)" width="90" x="514" y="34"/>
              <operator activated="false" class="text:generate_n_grams_terms" compatibility="9.3.001" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="648" y="85">
                <parameter key="max_length" value="2"/>
              </operator>
              <connect from_port="single" to_op="Tokenize (2)" to_port="document"/>
              <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
              <connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Tokens (by Length) (2)" to_port="document"/>
              <connect from_op="Filter Tokens (by Length) (2)" from_port="document" to_op="Filter Stopwords (English) (2)" to_port="document"/>
              <connect from_op="Filter Stopwords (English) (2)" from_port="document" to_port="output 1"/>
              <portSpacing port="source_single" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="operator_toolbox:apply_model_documents" compatibility="2.8.001" expanded="true" height="103" name="Apply Model (Documents)" origin="GENERATED_TUTORIAL" width="90" x="782" y="289">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="split" compatibility="9.8.000" expanded="true" height="82" name="Split" width="90" x="916" y="85">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="Scoring String"/>
            <parameter key="attributes" value="|Scoring String"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="split_pattern" value=","/>
            <parameter key="split_mode" value="ordered_split"/>
          </operator>
          <connect from_op="Subprocess" from_port="out 1" to_op="Dictionary Based Sentiment" to_port="exa"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Dictionary Based Sentiment" to_port="neg"/>
          <connect from_op="Dictionary Based Sentiment" from_port="mod" to_op="Apply Model (Documents)" to_port="mod"/>
          <connect from_op="Retrieve Henn na Hotel Huis Ten Bosch" from_port="output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_op="Data to Documents" to_port="example set"/>
          <connect from_op="Data to Documents" from_port="documents" to_op="Loop Collection" to_port="collection"/>
          <connect from_op="Loop Collection" from_port="output 1" to_op="Apply Model (Documents)" to_port="doc"/>
          <connect from_op="Apply Model (Documents)" from_port="exa" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>





Sign In or Register to comment.