Due to recent updates, all users are required to create an Altair One account to login to the RapidMiner community. Click the Register button to create your account using the same email that you have previously used to login to the RapidMiner community. This will ensure that any previously created content will be synced to your Altair One account. Once you login, you will be asked to provide a username that identifies you to other Community users. Email us at Community with questions.

Process 1 file out of multiple in Email

mtranmtran Member Posts: 4 Contributor I
edited June 2019 in Help
Hello,

  I'll start off that I don't have any issues running my process when there is only 1 attachment to the email received.  The issue I am running into is when there is more than 1 attachment in the email.  I only want to process 1/2 of the files attached.  I tried to put in the file name into the 'attachment file pattern' field not sure if I have it entered incorrectly.  
Attachment File Pattern: 'test.xlsx'

Any help would be greatly appreciated and thanks in advance.  I've posted my XML below


<?xml version="1.0" encoding="UTF-8"?><process version="9.1.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.1.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="handle_exception" compatibility="9.1.000" expanded="true" height="82" name="Handle Exception" width="90" x="246" y="34">
        <process expanded="true">
          <operator activated="true" class="text:process_mail_documents" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Mail Store (2)" width="90" x="179" y="136">
            <parameter key="create_word_vector" value="false"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="false"/>
            <parameter key="keep_text" value="true"/>
            <parameter key="prune_method" value="none"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="define_store" value="explicit"/>
            <parameter key="jndi_name" value="java:/Mail"/>
            <list key="connection_properties"/>
            <parameter key="protocol" value="imap"/>
            <parameter key="break_on_invalid_emails" value="false"/>
            <parameter key="only_unseen" value="true"/>
            <parameter key="mark_seen" value="true"/>
            <parameter key="delete_messages" value="false"/>
            <parameter key="recursive" value="false"/>
            <parameter key="folder" value="INBOX"/>
            <parameter key="download attachments" value="true"/>
            <parameter key="attachment file-pattern" value="test.xlsx"/>
            <parameter key="attachment MIME-type" value=""/>
            <process expanded="true">
              <operator activated="true" class="select" compatibility="9.1.000" expanded="true" height="68" name="Select (2)" width="90" x="112" y="85">
                <parameter key="index" value="1"/>
                <parameter key="unfold" value="false"/>
              </operator>
              <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel (2)" width="90" x="246" y="85">
                <parameter key="sheet_selection" value="sheet name"/>
                <parameter key="sheet_name" value="Test 2"/>
                <parameter key="sheet_number" value="1"/>
                <parameter key="imported_cell_range" value="A1"/>
                <parameter key="encoding" value="SYSTEM"/>
                <parameter key="first_row_as_names" value="false"/>
                <list key="annotations"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
                <parameter key="read_all_values_as_polynominal" value="false"/>
                <list key="data_set_meta_data_information"/>
                <parameter key="read_not_matching_values_as_missings" value="true"/>
                <parameter key="datamanagement" value="double_array"/>
                <parameter key="data_management" value="auto"/>
              </operator>
              <operator activated="false" class="store" compatibility="9.1.000" expanded="true" height="68" name="Store (2)" width="90" x="581" y="340">
                <parameter key="repository_entry" value="//Local Repository/MailTest"/>
              </operator>
              <operator activated="false" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents (2)" width="90" x="447" y="340">
                <parameter key="select_attributes_and_weights" value="false"/>
                <list key="specify_weights"/>
              </operator>
              <connect from_port="document" to_port="document 1"/>
              <connect from_port="attachments" to_op="Select (2)" to_port="collection"/>
              <connect from_op="Select (2)" from_port="selected" to_op="Read Excel (2)" to_port="file"/>
              <connect from_op="Read Excel (2)" from_port="output" to_port="document 2"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="source_attachments" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
              <portSpacing port="sink_document 3" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Process Documents from Mail Store (2)" from_port="example set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="text:process_mail_documents" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Mail Store (3)" width="90" x="179" y="187">
            <parameter key="create_word_vector" value="false"/>
            <parameter key="vector_creation" value="TF-IDF"/>
            <parameter key="add_meta_information" value="false"/>
            <parameter key="keep_text" value="true"/>
            <parameter key="prune_method" value="none"/>
            <parameter key="prune_below_percent" value="3.0"/>
            <parameter key="prune_above_percent" value="30.0"/>
            <parameter key="prune_below_rank" value="0.05"/>
            <parameter key="prune_above_rank" value="0.95"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
            <parameter key="define_store" value="explicit"/>
            <parameter key="jndi_name" value="java:/Mail"/>
            <list key="connection_properties"/>
            <parameter key="protocol" value="imap"/>
            <parameter key="break_on_invalid_emails" value="false"/>
            <parameter key="only_unseen" value="true"/>
            <parameter key="mark_seen" value="true"/>
            <parameter key="delete_messages" value="false"/>
            <parameter key="recursive" value="false"/>
            <parameter key="folder" value="INBOX"/>
            <parameter key="download attachments" value="true"/>
            <parameter key="attachment file-pattern" value="test.xlsx"/>
            <parameter key="attachment MIME-type" value=""/>
            <process expanded="true">
              <operator activated="true" class="select" compatibility="9.1.000" expanded="true" height="68" name="Select (3)" width="90" x="112" y="85">
                <parameter key="index" value="1"/>
                <parameter key="unfold" value="false"/>
              </operator>
              <operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel (3)" width="90" x="246" y="85">
                <parameter key="sheet_selection" value="sheet name"/>
                <parameter key="sheet_name" value="Test 2"/>
                <parameter key="sheet_number" value="1"/>
                <parameter key="imported_cell_range" value="A1"/>
                <parameter key="encoding" value="SYSTEM"/>
                <parameter key="first_row_as_names" value="false"/>
                <list key="annotations"/>
                <parameter key="date_format" value=""/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="locale" value="English (United States)"/>
                <parameter key="read_all_values_as_polynominal" value="false"/>
                <list key="data_set_meta_data_information"/>
                <parameter key="read_not_matching_values_as_missings" value="true"/>
                <parameter key="datamanagement" value="double_array"/>
                <parameter key="data_management" value="auto"/>
              </operator>
              <operator activated="false" class="store" compatibility="9.1.000" expanded="true" height="68" name="Store (3)" width="90" x="581" y="340">
                <parameter key="repository_entry" value="//Local Repository/MailTest"/>
              </operator>
              <operator activated="false" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents (3)" width="90" x="447" y="340">
                <parameter key="select_attributes_and_weights" value="false"/>
                <list key="specify_weights"/>
              </operator>
              <connect from_port="document" to_port="document 1"/>
              <connect from_port="attachments" to_op="Select (3)" to_port="collection"/>
              <connect from_op="Select (3)" from_port="selected" to_op="Read Excel (3)" to_port="file"/>
              <connect from_op="Read Excel (3)" from_port="output" to_port="document 2"/>
              <portSpacing port="source_document" spacing="0"/>
              <portSpacing port="source_attachments" spacing="0"/>
              <portSpacing port="sink_document 1" spacing="0"/>
              <portSpacing port="sink_document 2" spacing="0"/>
              <portSpacing port="sink_document 3" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Process Documents from Mail Store (3)" from_port="example set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Handle Exception" from_port="out 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Best Answer

Answers

  • PapadPapad Member Posts: 68 Guru
    Hello @mtran ,
    In my opinion, this xml code doesn't have any relationship with the email process.
    Also, even if I don't know if there is a specific operator about that, generally when you want to do 
    the same thing but multiply times, there are "loop" operator. So I suggest you have a look at them.
    Hope it helps.
Sign In or Register to comment.