🎉 🎉 RAPIDMINER 9.10 IS OUT!!! 🎉🎉

Download the latest version helping analytics teams accelerate time-to-value for streaming and IIOT use cases.

CLICK HERE TO DOWNLOAD

"what's wrong with my Generate Attribute?"

cindyharpercindyharper Member Posts: 9 Contributor II
edited June 2019 in Help
I get  Syntax Error:
An operator failed to generate a macro, attribute or other object which is calculated on the fly.
My XML text entries (in InstitutionName or macroGoogleQuery) have CRLFs in them - could that be the problem? How do I filter out the \r and \n's?

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
    <parameter key="logverbosity" value="all"/>
    <parameter key="logfile" value="C:\Users\charper\Documents\Rapid-I\GetPagesLog.txt"/>
    <parameter key="resultfile" value="C:\Users\charper\Documents\Rapid-I\ExampleSet1.csv"/>
    <parameter key="encoding" value="UTF-8"/>
    <process expanded="true" height="428" width="547">
      <operator activated="true" class="read_xml" compatibility="5.2.003" expanded="true" height="60" name="Read XML" width="90" x="45" y="30">
        <parameter key="file" value="C:\Users\charper\Documents\ALiNBUS\libwebcatsout-20120303.xml"/>
        <parameter key="xpath_for_examples" value="//html/Institution"/>
        <enumeration key="xpaths_for_attributes">
          <parameter key="xpath_for_attribute" value="InstitutionName[1]/text()"/>
          <parameter key="xpath_for_attribute" value="Row[1]/attribute::rownum"/>
          <parameter key="xpath_for_attribute" value="LibraryName[1]/text()"/>
          <parameter key="xpath_for_attribute" value="Location[1]/text()"/>
          <parameter key="xpath_for_attribute" value="Consortium[1]/text()"/>
          <parameter key="xpath_for_attribute" value="website[1]/text()"/>
          <parameter key="xpath_for_attribute" value="catalog[1]/text()"/>
          <parameter key="xpath_for_attribute" value="CatalogVendor[1]/text()"/>
        </enumeration>
        <list key="namespaces">
          <parameter key="xsi" value="http://www.w3.org/2001/XMLSchema-instance"/>
        </list>
        <parameter key="use_default_namespace" value="false"/>
        <list key="annotations"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="InstitutionName[1]/text().true.polynominal.attribute"/>
          <parameter key="1" value="Row[1]/attribute::rownum.true.integer.attribute"/>
          <parameter key="2" value="LibraryName[1]/text().true.text.attribute"/>
          <parameter key="3" value="Location[1]/text().true.polynominal.attribute"/>
          <parameter key="4" value="Consortium[1]/text().true.polynominal.attribute"/>
          <parameter key="5" value="website[1]/text().true.text.attribute"/>
          <parameter key="6" value="catalog[1]/text().true.text.attribute"/>
          <parameter key="7" value="CatalogVendor[1]/text().true.polynominal.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="5.2.003" expanded="true" height="76" name="Loop Examples" width="90" x="179" y="30">
        <process expanded="true" height="446" width="557">
          <operator activated="true" class="extract_macro" compatibility="5.2.003" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
            <parameter key="macro" value="macroGoogleQuery"/>
            <parameter key="macro_type" value="data_value"/>
            <parameter key="attribute_name" value="InstitutionName[1]/text()"/>
            <parameter key="example_index" value="1"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.2.003" expanded="true" height="76" name="Generate Attributes" width="90" x="380" y="210">
            <list key="function_descriptions">
              <parameter key="urlGoogleQuery" value="&quot;http://www.google.com/search?q=&quot; + replaceAll(&quot;%{macroGoogleQuery}&quot;,&quot;\n&quot;,&quot;+&quot;)"/>
            </list>
            <parameter key="use_standard_constants" value="false"/>
          </operator>
          <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="example set"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Read XML" from_port="output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Answers

  • haddockhaddock Member Posts: 849  Guru
    Hi there,

    It might also be that your attribute names contain brackets; normally this doesn't matter, but in regex it does, you have to be very careful about escaping special characters, and avoiding others.  Here's and example which produces your error
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.003">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
        <process expanded="true" height="263" width="822">
          <operator activated="true" class="generate_data" compatibility="5.2.003" expanded="true" height="60" name="Generate Data" width="90" x="87" y="38"/>
          <operator activated="true" class="rename" compatibility="5.2.003" expanded="true" height="76" name="Rename" width="90" x="216" y="36">
            <parameter key="old_name" value="att1"/>
            <parameter key="new_name" value="att(1)"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.2.003" expanded="true" height="76" name="Generate Attributes" width="90" x="380" y="30">
            <list key="function_descriptions">
              <parameter key="Gotcha" value="att(1)+att2+att3"/>
            </list>
          </operator>
          <connect from_op="Generate Data" from_port="output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    I know this from bitter, and recent, experience, here's some code for my RapidAnalytics server, where I read in some attributes with brackets, and scale one column against another. Don't try and run it, just check out the attribute generator, and in particular the regex...
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.003">
      <context>
        <input>
          <location>PricesNPatterns</location>
        </input>
        <output/>
        <macros>
          <macro>
            <key>symbol</key>
            <value/>
          </macro>
        </macros>
      </context>
      <operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
        <process expanded="true" height="697" width="815">
          <operator activated="true" class="select_attributes" compatibility="5.2.003" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="75">
            <parameter key="attribute_filter_type" value="regular_expression"/>
            <parameter key="regular_expression" value=".*%{symbol}"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="5.2.003" expanded="true" height="60" name="H" width="90" x="313" y="75">
            <parameter key="macro" value="H"/>
            <parameter key="macro_type" value="statistics"/>
            <parameter key="statistics" value="max"/>
            <parameter key="attribute_name" value="AVG(C)_%{symbol}"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="5.2.003" expanded="true" height="60" name="L" width="90" x="313" y="165">
            <parameter key="macro" value="L"/>
            <parameter key="macro_type" value="statistics"/>
            <parameter key="statistics" value="min"/>
            <parameter key="attribute_name" value="AVG(C)_%{symbol}"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="5.2.003" expanded="true" height="60" name="MaxP" width="90" x="447" y="75">
            <parameter key="macro" value="MaxP"/>
            <parameter key="macro_type" value="statistics"/>
            <parameter key="statistics" value="max"/>
            <parameter key="attribute_name" value="Label_Sets_%{symbol}"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="5.2.003" expanded="true" height="60" name="MinP" width="90" x="447" y="165">
            <parameter key="macro" value="MinP"/>
            <parameter key="macro_type" value="statistics"/>
            <parameter key="statistics" value="min"/>
            <parameter key="attribute_name" value="Label_Sets_%{symbol}"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.2.003" expanded="true" height="76" name="Generate Attributes" width="90" x="581" y="165">
            <list key="function_descriptions">
              <parameter key="P-%{symbol}" value="%{L}+((Label_Sets_%{symbol}-%{MinP})/(%{MaxP}-%{MinP}))*(%{H}-%{L})"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.2.003" expanded="true" height="76" name="Select Attributes (2)" width="90" x="648" y="30">
            <parameter key="attribute_filter_type" value="regular_expression"/>
            <parameter key="regular_expression" value=".*"/>
            <parameter key="use_except_expression" value="true"/>
            <parameter key="except_regular_expression" value="Label.*"/>
          </operator>
          <operator activated="true" class="rename_by_replacing" compatibility="5.2.003" expanded="true" height="76" name="Rename by Replacing" width="90" x="715" y="120">
            <parameter key="replace_what" value="AVG\(C\)\_"/>
          </operator>
          <connect from_port="input 1" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="H" to_port="example set"/>
          <connect from_op="H" from_port="example set" to_op="L" to_port="example set"/>
          <connect from_op="L" from_port="example set" to_op="MaxP" to_port="example set"/>
          <connect from_op="MaxP" from_port="example set" to_op="MinP" to_port="example set"/>
          <connect from_op="MinP" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
          <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
          <connect from_op="Rename by Replacing" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>
    Here the new attribute's name comes from a macro, and its value comes from another attribute, itself named using a macro, multiplied by some other macros, themselves named according to a macro. So macros and attributes in total confusion,  and the regex parser says it's illegal. What a treat, but guess what, it works...

    Good luck with your endeavours.



  • cindyharpercindyharper Member Posts: 9 Contributor II
    So here's a real newbie question - I could see how I could make the value of an attribute available for use in the Generate Attribute operator if I made a macro, but can I call the value of an attribute directly in my Generate Attribute operator?  Can't I set the value of the new attribute to  %{InstitutionName.value} if InstitutionName is the name of one of my row attributes?
  • haddockhaddock Member Posts: 849  Guru
    Can't I set the value of the new attribute to  %{InstitutionName.value} if InstitutionName is the name of one of my row attributes?
    Yes. That happens in my second example.
Sign In or Register to comment.