Options

Write WordList a text file (txt) [SOLVED]

josejose Member Posts: 16 Contributor II
Hi,
I have a process that reads an excel file. and then process the excel file using the operator "process document".
The output of this operator (wordlist), I write it to a text file (txt) but do not know what operator to use to accomplish this.

the xml of process is...

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.003">
 <context>
   <input/>
   <output/>
   <macros/>
 </context>
 <operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
   <process expanded="true" height="370" width="614">
     <operator activated="true" class="read_excel" compatibility="5.2.003" expanded="true" height="60" name="Read Excel" width="90" x="45" y="120">
       <parameter key="excel_file" value="/home/pepe/Escritorio/documentos de prueba/Septiembre.xls"/>
       <parameter key="imported_cell_range" value="A1:CE522"/>
       <parameter key="first_row_as_names" value="false"/>
       <list key="annotations">
         <parameter key="0" value="Name"/>
       </list>
       <list key="data_set_meta_data_information">
         <parameter key="0" value="�Qu� es lo que m�s te ha gustado de Vueling y quieres que sigamos haciendo, o por el contrario, lo que menos te ha gustado y nos sugieres cambiar? .true.text.attribute"/>
       </list>
     </operator>
     <operator activated="true" class="text:process_document_from_data" compatibility="5.2.001" expanded="true" height="76" name="Process Documents from Data" width="90" x="246" y="75">
       <parameter key="vector_creation" value="Term Occurrences"/>
       <parameter key="prune_method" value="absolute"/>
       <parameter key="prune_below_absolute" value="2"/>
       <parameter key="prune_above_absolute" value="9999"/>
       <list key="specify_weights"/>
       <process expanded="true" height="505" width="636">
         <operator activated="true" class="text:tokenize" compatibility="5.2.001" expanded="true" height="60" name="Tokenize" width="90" x="103" y="58"/>
         <operator activated="true" class="text:transform_cases" compatibility="5.2.001" expanded="true" height="60" name="Transform Cases" width="90" x="246" y="75"/>
         <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.001" expanded="true" height="60" name="Filter Stopwords (Dictionary)" width="90" x="45" y="165">
           <parameter key="file" value="/home/pepe/Escritorio/documentos de prueba/stop words.txt"/>
         </operator>
         <operator activated="true" class="text:filter_by_length" compatibility="5.2.001" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="112" y="255"/>
         <operator activated="true" class="text:replace_tokens" compatibility="5.2.001" expanded="true" height="60" name="Replace Tokens" width="90" x="246" y="255">
           <list key="replace_dictionary">
             <parameter key="horas" value="hora"/>
             <parameter key="maletas" value="maleta"/>
             <parameter key="vuelos" value="vuelo"/>
             <parameter key="precios" value="precio"/>
             <parameter key="asientos" value="asiento"/>
             <parameter key="Vueling" value="vueling"/>
             <parameter key="MALETAS" value="maleta"/>
             <parameter key="VUELOS" value="vuelo"/>
           </list>
         </operator>
         <operator activated="true" class="text:generate_n_grams_terms" compatibility="5.2.001" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="380" y="255"/>
         <connect from_port="document" to_op="Tokenize" to_port="document"/>
         <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
         <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
         <connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
         <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Replace Tokens" to_port="document"/>
         <connect from_op="Replace Tokens" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
         <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
         <portSpacing port="source_document" spacing="0"/>
         <portSpacing port="sink_document 1" spacing="0"/>
         <portSpacing port="sink_document 2" spacing="0"/>
       </process>
     </operator>
     <connect from_op="Read Excel" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
     <connect from_op="Process Documents from Data" from_port="example set" to_port="result 1"/>
     <connect from_op="Process Documents from Data" from_port="word list" to_port="result 2"/>
     <portSpacing port="source_input 1" spacing="0"/>
     <portSpacing port="sink_result 1" spacing="0"/>
     <portSpacing port="sink_result 2" spacing="0"/>
     <portSpacing port="sink_result 3" spacing="0"/>
   </process>
 </operator>
</process>

Answers

  • Options
    MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869 Unicorn
    Hi Jose,

    you can use the WordList to Data operator to convert the Wordlist to an example set and then write it to disk with e.g. Write CSV.

    All the best,
    Marius
  • Options
    josejose Member Posts: 16 Contributor II
    Thanks Marius.

    You are a genius.
Sign In or Register to comment.