FP-Growth's huge memory consumption

daradara Member Posts: 29 Contributor II
edited November 2018 in Help
Hello

I used  a database of 23 resumes, each less than 3 pages of text and simple processes to figure out the association, which in previous versions of Rapid Miner videos works fast, but even at 23 resumes it runs for hours and FP-Growth consumes vasts amounts of memory 8G and more!

I can send the resumes in a zip file if needed.

See the code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="5.3.013" expanded="true" height="60" name="Retrieve investmenBankingMA" width="90" x="45" y="525">
        <parameter key="repository_entry" value="investmenBankingMA"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Data" width="90" x="179" y="435">
        <parameter key="add_meta_information" value="false"/>
        <parameter key="keep_text" value="true"/>
        <parameter key="prune_method" value="absolute"/>
        <parameter key="prune_below_absolute" value="2"/>
        <parameter key="prune_above_absolute" value="9999"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="5.3.001" expanded="true" height="60" name="Tokenize" width="90" x="112" y="165"/>
          <operator activated="true" class="text:transform_cases" compatibility="5.3.001" expanded="true" height="60" name="Transform Cases" width="90" x="246" y="165"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.001" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="380" y="165"/>
          <operator activated="true" class="text:filter_by_length" compatibility="5.3.001" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="581" y="165">
            <parameter key="min_chars" value="3"/>
          </operator>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="numerical_to_binominal" compatibility="5.3.013" expanded="true" height="76" name="Numerical to Binominal" width="90" x="313" y="435"/>
      <operator activated="true" class="fp_growth" compatibility="5.3.013" expanded="true" height="76" name="FP-Growth" width="90" x="447" y="435">
        <parameter key="min_support" value="0.05"/>
      </operator>
      <operator activated="true" class="create_association_rules" compatibility="5.3.013" expanded="true" height="76" name="Create Association Rules" width="90" x="581" y="435">
        <parameter key="min_confidence" value="0.95"/>
      </operator>
      <connect from_op="Retrieve investmenBankingMA" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Numerical to Binominal" to_port="example set input"/>
      <connect from_op="Numerical to Binominal" from_port="example set output" to_op="FP-Growth" to_port="example set"/>
      <connect from_op="FP-Growth" from_port="frequent sets" to_op="Create Association Rules" to_port="item sets"/>
      <connect from_op="Create Association Rules" from_port="rules" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Sign In or Register to comment.