How To Use R's Wordcloud2 Library in Rapidminer

chipp300chipp300 Member Posts: 4 Contributor I
Hello! I have been using a registered student version of Rapidminer since Fall of last year and feel like I have decent handle of the basics. Part of my capstone project I am working on, though, requires me to create some wordclouds and found a very useful tutorial on Thomas Ott's Neural Market Trends site that uses R's Wordcloud library along with the RColorBrewer library; the XML code provided is heavily inspired by his work. 

While I was able to get the original Wordcloud function to work I am not able to get the newer Wordcloud2 library to work with Rapidminer's 'Execute R' process. I know Wordcloud2 was successfully installed because I can call the function in my R Studio instance using the prebuilt data.frame demoFreq containing both the word and its respective frequency via wordcloud2(demoFreq). I know the rest of the Rapidminer process is solid but when it gets to the 'Execute R' process that contains the R code it fails, so I know it has something to do with the syntax as Wordclod2 wants a data frame / variable that contains both the words and their frequencies similar to the demoFreq data.frame example. Can anyone help me figure out the syntax needed to change the 'Execute R' process from using the wordcloud library to instead using the wordcloud2 library? Please find my code below (sorry in advance if it's not in the correct format):


<?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.6.000" expanded="true" height="68" name="Retrieve" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Local Repository/MiamiValleyHospitalsFirstPAss"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.6.000" expanded="true" height="82" name="Subprocess (miss)" width="90" x="179" y="340">
        <process expanded="true">
          <operator activated="true" class="replace_missing_values" compatibility="9.6.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="179" y="34">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value="coordinates|geo|user_description|user_location"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default" value="value"/>
            <list key="columns"/>
            <parameter key="replenishment_value" value="Unknown"/>
          </operator>
          <connect from_port="in 1" to_op="Replace Missing Values" to_port="example set input"/>
          <connect from_op="Replace Missing Values" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.6.000" expanded="true" height="82" name="Subprocess (clean)" width="90" x="313" y="340">
        <process expanded="true">
          <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace http*" width="90" x="179" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="replace_what" value="http:.*"/>
            <parameter key="replace_by" value="linktag"/>
          </operator>
          <operator activated="false" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace @ width=90" x="313" y="340">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="replace_what" value="\@/&gt;
            <parameter key="replace_by" value="attag"/>
          </operator>
          <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace #" width="90" x="447" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="replace_what" value="\#"/>
            <parameter key="replace_by" value="hashtag"/>
          </operator>
          <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace ?" width="90" x="581" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="replace_what" value="\?"/>
            <parameter key="replace_by" value="questiontag"/>
          </operator>
          <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace !" width="90" x="715" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="replace_what" value="\!"/>
            <parameter key="replace_by" value="exclamationtag"/>
          </operator>
          <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace https*" width="90" x="849" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="replace_what" value="https:.*"/>
            <parameter key="replace_by" value="attag"/>
          </operator>
          <connect from_port="in 1" to_op="Replace http*" to_port="example set input"/>
          <connect from_op="Replace http*" from_port="example set output" to_op="Replace #" to_port="example set input"/>
          <connect from_op="Replace #" from_port="example set output" to_op="Replace ?" to_port="example set input"/>
          <connect from_op="Replace ?" from_port="example set output" to_op="Replace !" to_port="example set input"/>
          <connect from_op="Replace !" from_port="example set output" to_op="Replace https*" to_port="example set input"/>
          <connect from_op="Replace https*" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.6.000" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="340">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="text"/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="nominal_to_text" compatibility="9.6.000" expanded="true" height="82" name="Nominal to Text" width="90" x="581" y="340">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="text"/>
        <parameter key="attributes" value=""/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="nominal"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="file_path"/>
        <parameter key="block_type" value="single_value"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="single_value"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="715" y="340">
        <parameter key="create_word_vector" value="true"/>
        <parameter key="vector_creation" value="TF-IDF"/>
        <parameter key="add_meta_information" value="true"/>
        <parameter key="keep_text" value="false"/>
        <parameter key="prune_method" value="none"/>
        <parameter key="prune_below_percent" value="3.0"/>
        <parameter key="prune_above_percent" value="30.0"/>
        <parameter key="prune_below_rank" value="0.05"/>
        <parameter key="prune_above_rank" value="0.95"/>
        <parameter key="datamanagement" value="double_sparse_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="select_attributes_and_weights" value="false"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases" width="90" x="112" y="34">
            <parameter key="transform_to" value="lower case"/>
          </operator>
          <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize (2)" width="90" x="246" y="238">
            <parameter key="mode" value="non letters"/>
            <parameter key="characters" value=".:"/>
            <parameter key="language" value="English"/>
            <parameter key="max_token_length" value="3"/>
          </operator>
          <operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="514" y="238">
            <parameter key="min_chars" value="3"/>
            <parameter key="max_chars" value="25"/>
          </operator>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="715" y="238"/>
          <operator activated="true" class="text:filter_tokens_by_content" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Content)" width="90" x="849" y="34">
            <parameter key="condition" value="matches"/>
            <parameter key="string" value=".*tag.*"/>
            <parameter key="regular_expression" value=".*tag.*"/>
            <parameter key="case_sensitive" value="false"/>
            <parameter key="invert condition" value="true"/>
          </operator>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="9.3.001" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="1117" y="34">
            <parameter key="max_length" value="2"/>
          </operator>
          <connect from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Content)" to_port="document"/>
          <connect from_op="Filter Tokens (by Content)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="648" y="442">
        <parameter key="create_word_vector" value="true"/>
        <parameter key="vector_creation" value="TF-IDF"/>
        <parameter key="add_meta_information" value="true"/>
        <parameter key="keep_text" value="false"/>
        <parameter key="prune_method" value="percentual"/>
        <parameter key="prune_below_percent" value="3.0"/>
        <parameter key="prune_above_percent" value="30.0"/>
        <parameter key="prune_below_rank" value="0.05"/>
        <parameter key="prune_above_rank" value="0.95"/>
        <parameter key="datamanagement" value="double_sparse_array"/>
        <parameter key="data_management" value="auto"/>
        <parameter key="select_attributes_and_weights" value="false"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34">
            <parameter key="mode" value="non letters"/>
            <parameter key="characters" value=".:"/>
            <parameter key="language" value="English"/>
            <parameter key="max_token_length" value="3"/>
          </operator>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="text:wordlist_to_data" compatibility="9.3.001" expanded="true" height="82" name="WordList to Data" width="90" x="849" y="340"/>
      <operator activated="true" class="select_attributes" compatibility="9.6.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="849" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value=""/>
        <parameter key="attributes" value="word|total"/>
        <parameter key="use_except_expression" value="false"/>
        <parameter key="value_type" value="attribute_value"/>
        <parameter key="use_value_type_exception" value="false"/>
        <parameter key="except_value_type" value="time"/>
        <parameter key="block_type" value="attribute_block"/>
        <parameter key="use_block_type_exception" value="false"/>
        <parameter key="except_block_type" value="value_matrix_row_start"/>
        <parameter key="invert_selection" value="false"/>
        <parameter key="include_special_attributes" value="false"/>
      </operator>
      <operator activated="true" class="r_scripting:execute_r" compatibility="9.1.000" expanded="true" height="103" name="Execute R" width="90" x="849" y="34">
        <parameter key="script" value="# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none)&#10;rm_main = function(data)&#10;{&#10;&#9;library(&quot;wordcloud&quot;)&#10;&#9;library(&quot;RColorBrewer&quot;)&#10;&#10;&#9;Encoding(data$word) &lt;- &quot;UTF-8&quot;&#10;&#9;&#10;&#9;png(&quot;C:\\Users\\Nick\\Desktop\\capstone\\wordcloud.png&quot;, width=1280,height=800)&#10;&#9;wordcloud(words = data$word, freq = data$total, min.freq = 1, max.words=200, lang= &quot;English&quot;, random.order=FALSE, rot.per=0.35, colors=brewer.pal(11, &quot;Spectral&quot;))&#10;&#9;dev.off()&#10;&#9;&#10;&#9;return (data)&#10;}&#10;"/>
      </operator>
      <operator activated="false" class="r_scripting:execute_r" compatibility="9.1.000" expanded="true" height="82" name="Execute R (2)" width="90" x="715" y="595">
        <parameter key="script" value="# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none)&#10;rm_main = function(data)&#10;{&#10;&#9;library(&quot;wordcloud2&quot;)&#10;&#10;&#9;x &lt;- word&#10;&#9;y &lt;- total&#10;&#9;df &lt;-  word+total&#10;&#9;png(&quot;C:\\Users\\Nick\\Desktop\\capstone\\wordcloud.png&quot;, width=1280,height=800)&#10;&#9;wordcloud2(df, size=1.6)&#10;&#9;dev.off()&#10;&#9;&#10;&#9;return (data)&#10;}&#10;"/>
      </operator>
      <operator activated="false" class="write_excel" compatibility="9.6.000" expanded="true" height="82" name="Write Excel" width="90" x="581" y="34">
        <parameter key="file_format" value="xlsx"/>
        <enumeration key="sheet_names"/>
        <parameter key="sheet_name" value="RapidMiner Data"/>
        <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
        <parameter key="number_format" value="#.0"/>
        <parameter key="encoding" value="SYSTEM"/>
      </operator>
      <operator activated="false" class="write_file" compatibility="9.6.000" expanded="true" height="68" name="Write File" width="90" x="715" y="34">
        <parameter key="resource_type" value="file"/>
        <parameter key="filename" value="Z:\capstoneWorkingFiles\capturedData\rapidminerFirstPass\result\rapidminerResults"/>
        <parameter key="mime_type" value="application/octet-stream"/>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Subprocess (miss)" to_port="in 1"/>
      <connect from_op="Subprocess (miss)" from_port="out 1" to_op="Subprocess (clean)" to_port="in 1"/>
      <connect from_op="Subprocess (clean)" from_port="out 1" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
      <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
      <connect from_op="Process Documents from Data (2)" from_port="word list" to_op="WordList to Data" to_port="word list"/>
      <connect from_op="WordList to Data" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Execute R" to_port="input 1"/>
      <connect from_op="Execute R" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>



Jasmine_

Answers

  • Marco_BoeckMarco_Boeck Team Lead Software Engineering Administrator, Moderator, Employee, Member, University Professor Posts: 1,908   RM Engineering
    Hi,

    May I ask why you don't want to use the integrated Wordcloud plot?

    Regards,
    Marco
    Jasmine_
  • chipp300chipp300 Member Posts: 4 Contributor I
    Hi Marco,

    I plan to use the integrated Wordcloud plot as well but I just wanted to explore other options and refine my methodology more. I liked what the wordcloud2 library could do and was curious as to what I needed to fix in the 'Execute R' process to get wordcloud2 to work. 
    Jasmine_
  • Marco_BoeckMarco_Boeck Team Lead Software Engineering Administrator, Moderator, Employee, Member, University Professor Posts: 1,908   RM Engineering
    Hi,

    Thanks! I was just curious if the built-in version was unusable for one reason or another :)
    Unfortunately I cannot help you with the R script, that's not my area of expertise.

    Regards,
    Marco
    Jasmine_
  • chipp300chipp300 Member Posts: 4 Contributor I
    Oh ok well that's a bummer. Either way though I'm sure I'll be asking about Rapidminer's Wordcloud functionality when I give it a shot over the weekend. Thank you for reaching out!
    sgenzerJasmine_
  • chipp300chipp300 Member Posts: 4 Contributor I
    So I figured out a way to get wordcloud2 to finally work but required me to export the data needed into a spreadsheet, load it into my RStudio instance and was able to call the function and pass in the spreadsheet variable containing words and their frequencies. For those that stumble upon this post please find my xml code below. I would then google "little miss data wordcloud2" for a good tutorial in using R's wordcloud2. Please mark my post as answered. Thank you for your help!

    <?xml version="1.0" encoding="UTF-8"?><process version="9.6.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.4.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.6.000" expanded="true" height="68" name="Retrieve" width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Local Repository/MiamiValleyHospitalsFirstPass"/>
          </operator>
          <operator activated="true" class="subprocess" compatibility="9.6.000" expanded="true" height="82" name="Subprocess (miss)" width="90" x="179" y="340">
            <process expanded="true">
              <operator activated="true" class="replace_missing_values" compatibility="9.6.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="179" y="34">
                <parameter key="return_preprocessing_model" value="false"/>
                <parameter key="create_view" value="false"/>
                <parameter key="attribute_filter_type" value="subset"/>
                <parameter key="attribute" value=""/>
                <parameter key="attributes" value="coordinates|geo|user_description|user_location"/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="attribute_value"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="time"/>
                <parameter key="block_type" value="attribute_block"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="value_matrix_row_start"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="default" value="value"/>
                <list key="columns"/>
                <parameter key="replenishment_value" value="Unknown"/>
              </operator>
              <connect from_port="in 1" to_op="Replace Missing Values" to_port="example set input"/>
              <connect from_op="Replace Missing Values" from_port="example set output" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="subprocess" compatibility="9.6.000" expanded="true" height="82" name="Subprocess (clean)" width="90" x="313" y="340">
            <process expanded="true">
              <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace http*" width="90" x="179" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="text"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="replace_what" value="http:.*"/>
                <parameter key="replace_by" value="linktag"/>
              </operator>
              <operator activated="false" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace @ width=90" x="313" y="340">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="text"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="replace_what" value="\@/&gt;
                <parameter key="replace_by" value="attag"/>
              </operator>
              <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace #" width="90" x="447" y="187">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="text"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="replace_what" value="\#"/>
                <parameter key="replace_by" value="hashtag"/>
              </operator>
              <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace ?" width="90" x="581" y="187">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="text"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="replace_what" value="\?"/>
                <parameter key="replace_by" value="questiontag"/>
              </operator>
              <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace !" width="90" x="715" y="187">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="text"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="replace_what" value="\!"/>
                <parameter key="replace_by" value="exclamationtag"/>
              </operator>
              <operator activated="true" class="replace" compatibility="9.6.000" expanded="true" height="82" name="Replace https*" width="90" x="849" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="text"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="nominal"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="file_path"/>
                <parameter key="block_type" value="single_value"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="single_value"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <parameter key="replace_what" value="https:.*"/>
                <parameter key="replace_by" value="attag"/>
              </operator>
              <connect from_port="in 1" to_op="Replace http*" to_port="example set input"/>
              <connect from_op="Replace http*" from_port="example set output" to_op="Replace #" to_port="example set input"/>
              <connect from_op="Replace #" from_port="example set output" to_op="Replace ?" to_port="example set input"/>
              <connect from_op="Replace ?" from_port="example set output" to_op="Replace !" to_port="example set input"/>
              <connect from_op="Replace !" from_port="example set output" to_op="Replace https*" to_port="example set input"/>
              <connect from_op="Replace https*" from_port="example set output" to_port="out 1"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="9.6.000" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="340">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.6.000" expanded="true" height="82" name="Nominal to Text" width="90" x="581" y="340">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="715" y="340">
            <parameter key="create_word_vector" value="true"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="true"/>
            <parameter key="keep_text" value="false"/>
            <parameter key="prune_method" value="none"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="select_attributes_and_weights" value="false"/>
            <list key="specify_weights"/>
            <process expanded="true">
              <operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases" width="90" x="112" y="34">
                <parameter key="transform_to" value="lower case"/>
              </operator>
              <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize (2)" width="90" x="246" y="238">
                <parameter key="mode" value="non letters"/>
                <parameter key="characters" value=".:"/>
                <parameter key="language" value="English"/>
                <parameter key="max_token_length" value="3"/>
              </operator>
              <operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="514" y="238">
                <parameter key="min_chars" value="3"/>
                <parameter key="max_chars" value="25"/>
              </operator>
              <operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="715" y="238"/>
              <operator activated="true" class="text:filter_tokens_by_content" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Content)" width="90" x="849" y="34">
                <parameter key="condition" value="matches"/>
                <parameter key="string" value=".*tag.*"/>
                <parameter key="regular_expression" value=".*tag.*"/>
                <parameter key="case_sensitive" value="false"/>
                <parameter key="invert condition" value="true"/>
              </operator>
              <operator activated="true" class="text:generate_n_grams_terms" compatibility="9.3.001" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="1117" y="34">
                <parameter key="max_length" value="2"/>
              </operator>
              <connect from_port="document" to_op="Transform Cases" to_port="document"/>
              <connect from_op="Transform Cases" from_port="document" to_op="Tokenize (2)" to_port="document"/>
              <connect from_op="Tokenize (2)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
              <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
              <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Content)" to_port="document"/>
              <connect from_op="Filter Tokens (by Content)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
              <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="false" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="648" y="442">
            <parameter key="create_word_vector" value="true"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="true"/>
            <parameter key="keep_text" value="false"/>
            <parameter key="prune_method" value="percentual"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="select_attributes_and_weights" value="false"/>
            <list key="specify_weights"/>
            <process expanded="true">
              <operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34">
                <parameter key="mode" value="non letters"/>
                <parameter key="characters" value=".:"/>
                <parameter key="language" value="English"/>
                <parameter key="max_token_length" value="3"/>
              </operator>
              <connect from_port="document" to_op="Tokenize" to_port="document"/>
              <connect from_op="Tokenize" from_port="document" to_port="document 1"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="text:wordlist_to_data" compatibility="9.3.001" expanded="true" height="82" name="WordList to Data" width="90" x="849" y="340"/>
          <operator activated="true" class="select_attributes" compatibility="9.6.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="849" y="238">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value="word|total"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          </operator>
          <operator activated="true" class="write_excel" compatibility="9.6.000" expanded="true" height="103" name="Write Excel" width="90" x="849" y="85">
            <parameter key="file_format" value="xlsx"/>
            <enumeration key="sheet_names"/>
            <parameter key="sheet_name" value="RapidMiner Data"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="number_format" value="#.0"/>
            <parameter key="encoding" value="SYSTEM"/>
          </operator>
          <operator activated="true" class="write_file" compatibility="9.6.000" expanded="true" height="68" name="Write File" width="90" x="1117" y="85">
            <parameter key="resource_type" value="file"/>
            <parameter key="filename" value="Z:\capstoneWorkingFiles\capturedData\rapidminerFirstPass\result\rapidminerResults"/>
            <parameter key="mime_type" value="application/octet-stream"/>
          </operator>
          <operator activated="false" class="r_scripting:execute_r" compatibility="9.1.000" expanded="true" height="82" name="Execute R" width="90" x="983" y="442">
            <parameter key="script" value="# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none)&#10;rm_main = function(data)&#10;{&#10;&#9;library(&quot;wordcloud&quot;)&#10;&#9;library(&quot;RColorBrewer&quot;)&#10;&#10;&#9;Encoding(data$word) &lt;- &quot;UTF-8&quot;&#10;&#9;&#10;&#9;png(&quot;C:\\Users\\Nick\\Desktop\\wordcloud.png&quot;, width=1280,height=800)&#10;&#9;wordcloud(words = data$word, freq = data$total, min.freq = 1, max.words=200, lang= &quot;English&quot;, random.order=FALSE, rot.per=0.35, colors=brewer.pal(11, &quot;Spectral&quot;))&#10;&#9;dev.off()&#10;&#9;&#10;&#9;return (data)&#10;}&#10;"/>
          </operator>
          <operator activated="false" class="r_scripting:execute_r" compatibility="9.1.000" expanded="true" height="82" name="Execute R (2)" width="90" x="1117" y="442">
            <parameter key="script" value="# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none)&#10;rm_main = function(data)&#10;{&#10;&#9;library(wordcloud2)&#10;&#9;&#10;&#9;Encoding(data$word) &lt;- &quot;UTF-8&quot;&#10;&#9;&#10;&#9;png(&quot;C:\\Users\\Nick\\Desktop\\wordcloud.png&quot;, width=1280,height=800)&#10;&#9;wordcloud2(data$word, size=1.6, color=rep_len( c(&quot;green&quot;,&quot;blue&quot;), nrow(data$word) ) )&#10;&#9;dev.off()&#10;&#9;&#10;&#9;return (data)&#10;}&#10;"/>
          </operator>
          <connect from_op="Retrieve" from_port="output" to_op="Subprocess (miss)" to_port="in 1"/>
          <connect from_op="Subprocess (miss)" from_port="out 1" to_op="Subprocess (clean)" to_port="in 1"/>
          <connect from_op="Subprocess (clean)" from_port="out 1" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
          <connect from_op="Process Documents from Data (2)" from_port="word list" to_op="WordList to Data" to_port="word list"/>
          <connect from_op="WordList to Data" from_port="example set" to_op="Select Attributes (2)" to_port="example set input"/>
          <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Write Excel" to_port="input"/>
          <connect from_op="Write Excel" from_port="file" to_op="Write File" to_port="file"/>
          <connect from_op="Write File" from_port="file" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>



Sign In or Register to comment.