The RapidMiner community is on read-only mode until further notice. Technical support via cases will continue to work as is. For any urgent licensing related requests from Students/Faculty members, please use the Altair academic forum here.

Text Mining

NoorMohammad786NoorMohammad786 Member Posts: 1 Newbie
Hello everyone I am working on my master thesese, I have got the Tweets from Twitter. Now I want to minig the text that i have gotten and afterthat i want to do sentiment analysis is repaidminer can do this. one morething can i import excel datat and working on it.

Answers

  • btibertbtibert Member, University Professor Posts: 146 Guru
    Sentiment analysis is 100% possible in RM.  There is an operator for this task, and depending on your research, I might recommend labeling your dataset and fitting a model to predict sentiment.  Some dictionary-based approaches are simple token lookups, whereas the latter might help you learn the nuances of your domain.  Good luck.
  • kdafoekdafoe Member Posts: 20 Maven
    Hi NoorMohammad786. Yes. I do this all the time in RapidMiner. Here is an XML process of one way to get you started. If you want to bring in Excel text data rather than Twitter data, just replace the Search Twitter operator with the Retrieve operator and modify the Select Attributes. You will also need the Operator Toolkit from the Marketplace for the Extract Sentiment operator.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.10.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="social_media:search_twitter" compatibility="9.6.000" expanded="true" height="82" name="Search Twitter" width="90" x="112" y="34">
            <parameter key="connection_source" value="repository"/>
            <parameter key="connection_entry" value="//Local Repository/Connections/TwitterNew"/>
            <parameter key="query" value="Rapidminer"/>
            <parameter key="result_type" value="recent or popular"/>
            <parameter key="limit" value="100"/>
            <parameter key="filter_by_geo_location" value="false"/>
            <parameter key="radius_unit" value="miles"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="9.10.001" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.10.001" expanded="true" height="82" name="Nominal to Text" width="90" x="380" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="Text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="514" y="34">
            <parameter key="create_word_vector" value="true"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="true"/>
            <parameter key="keep_text" value="true"/>
            <parameter key="prune_method" value="none"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="select_attributes_and_weights" value="false"/>
            <list key="specify_weights"/>
            <process expanded="true">
              <operator activated="true" class="text:tokenize" compatibility="9.4.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="112" y="85">
                <parameter key="mode" value="specify characters"/>
                <parameter key="characters" value=".,!?[{:;&quot;(/"/>
                <parameter key="language" value="English"/>
                <parameter key="max_token_length" value="3"/>
              </operator>
              <operator activated="true" class="text:transform_cases" compatibility="9.4.000" expanded="true" height="68" name="Transform Cases" width="90" x="313" y="85">
                <parameter key="transform_to" value="lower case"/>
              </operator>
              <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
              <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases" to_port="document"/>
              <connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="operator_toolbox:extract_sentiment" compatibility="2.12.000" expanded="true" height="103" name="Extract Sentiment" width="90" x="648" y="136">
            <parameter key="model" value="vader"/>
            <parameter key="text_attribute" value="text"/>
            <parameter key="show_advanced_output" value="true"/>
            <parameter key="use_default_tokenization_regex" value="true"/>
            <list key="additional_words"/>
          </operator>
          <connect from_op="Search Twitter" from_port="output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
          <connect from_op="Process Documents from Data" from_port="example set" to_op="Extract Sentiment" to_port="exa"/>
          <connect from_op="Process Documents from Data" from_port="word list" to_port="result 1"/>
          <connect from_op="Extract Sentiment" from_port="exa" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>

Sign In or Register to comment.