Options

wrapping GetPage in HandleException - cannot open connection

cindyharpercindyharper Member Posts: 9 Contributor II
edited November 2018 in Help
I'm getting this error for every call of GetPage in a HamdlException wrapper inside a Loop Examples loop.

May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:04 AM WARNING: Handle Exception: Error occurred and will be neglected by Handle Exception: could not establish connection
May 18, 2012 10:48:05 AM INFO: Saving results.

Here's my process:


<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.006">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
    <parameter key="logverbosity" value="all"/>
    <parameter key="logfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3log"/>
    <parameter key="resultfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3result"/>
    <process expanded="true" height="415" width="487">
      <operator activated="true" class="retrieve" compatibility="5.2.006" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
        <parameter key="repository_entry" value="GooglePagesStep2"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="45" y="210">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="URL"/>
        <parameter key="attributes" value="Content-Length|Content-Type|Date|Expires|Last-Modified|Response-Code|Response-Message|URL|token_number"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="5.2.006" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="112" y="165">
        <list key="function_descriptions">
          <parameter key="PDF" value="contains(Href,&quot;.pdf&quot;)"/>
        </list>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="5.2.006" expanded="true" height="76" name="Filter Examples" width="90" x="179" y="75"/>
      <operator activated="true" class="loop_examples" compatibility="5.2.006" expanded="true" height="94" name="Loop Examples" width="90" x="246" y="30">
        <process expanded="true" height="357" width="480">
          <operator activated="true" class="handle_exception" compatibility="5.2.006" expanded="true" height="94" name="Handle Exception" width="90" x="45" y="30">
            <parameter key="exception_macro" value="exceptionmsg"/>
            <process expanded="true" height="375" width="225">
              <operator activated="true" class="web:get_webpage" compatibility="5.1.004" expanded="true" height="60" name="Get Page" width="90" x="45" y="30">
                <parameter key="url" value="Href"/>
                <parameter key="random_user_agent" value="true"/>
                <parameter key="connection_timeout" value="100000"/>
                <parameter key="read_timeout" value="100000"/>
                <parameter key="accept_cookies" value="all"/>
                <list key="query_parameters"/>
              </operator>
              <operator activated="true" class="text:extract_token_number" compatibility="5.2.001" expanded="true" height="60" name="Extract Token Number" width="90" x="45" y="120">
                <parameter key="condition" value="matches"/>
                <parameter key="regular_expression" value="LIBRAR"/>
              </operator>
              <operator activated="true" class="text:extract_token_number" compatibility="5.2.001" expanded="true" height="60" name="Extract Token Number (2)" width="90" x="45" y="210">
                <parameter key="metadata_key" value="LIBRARnumber"/>
                <parameter key="condition" value="matches"/>
                <parameter key="regular_expression" value="LIBRAR"/>
                <parameter key="invert_condition" value="true"/>
              </operator>
              <operator activated="true" class="text:documents_to_data" compatibility="5.2.001" expanded="true" height="76" name="Documents to Data (2)" width="90" x="112" y="300">
                <parameter key="text_attribute" value="NewsletterDoc"/>
                <parameter key="label_attribute" value="Href"/>
              </operator>
              <connect from_port="in 1" to_port="out 1"/>
              <connect from_op="Get Page" from_port="output" to_op="Extract Token Number" to_port="document"/>
              <connect from_op="Extract Token Number" from_port="document" to_op="Extract Token Number (2)" to_port="document"/>
              <connect from_op="Extract Token Number (2)" from_port="document" to_op="Documents to Data (2)" to_port="documents 1"/>
              <connect from_op="Documents to Data (2)" from_port="example set" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
            </process>
            <process expanded="true" height="375" width="202">
              <connect from_port="in 1" to_port="out 1"/>
              <connect from_port="in 2" to_port="out 2"/>
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="source_in 3" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
            </process>
          </operator>
          <connect from_port="example set" to_op="Handle Exception" to_port="in 1"/>
          <connect from_op="Handle Exception" from_port="out 1" to_port="example set"/>
          <connect from_op="Handle Exception" from_port="out 2" to_port="output 1"/>
          <portSpacing port="source_example set" spacing="0"/>
          <portSpacing port="sink_example set" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="flatten_collection" compatibility="5.2.006" expanded="true" height="60" name="Flatten Collection" width="90" x="246" y="165"/>
      <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store (2)" width="90" x="313" y="210">
        <parameter key="repository_entry" value="GooglePagesStep3Docs"/>
      </operator>
      <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store" width="90" x="380" y="120">
        <parameter key="repository_entry" value="GooglePagesStep3Store"/>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="example set" to_op="Store" to_port="input"/>
      <connect from_op="Loop Examples" from_port="output 1" to_op="Flatten Collection" to_port="collection"/>
      <connect from_op="Flatten Collection" from_port="flat" to_op="Store (2)" to_port="input"/>
      <connect from_op="Store (2)" from_port="through" to_port="result 2"/>
      <connect from_op="Store" from_port="through" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

Any suggestions? How do I delay between GETs for the Loop Examples loop? Will that help?

Answers

  • Options
    MariusHelfMariusHelf RapidMiner Certified Expert, Member Posts: 1,869 Unicorn
    For the delay you could try the Delay operator :)

    Does the process run without Handle Exception? Your thread title indicates that you have no/less problems without that operator.
    Oh no, I just saw that you entered "Href" as URL in Get Page - of course that does not work as it is not a valid url.
    I suppose you have a Href attribute in your example set. Then you need something as below. Please note the Extract Macro operator in Loop Examples, and the use of the href-macro in Get Page.

    Best,
    Marius
    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <process version="5.2.006">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="5.2.006" expanded="true" name="Process">
        <parameter key="logverbosity" value="all"/>
        <parameter key="logfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3log"/>
        <parameter key="resultfile" value="C:\Documents and Settings\charper\My Documents\ALiNBUS\googlepagesstep3result"/>
        <process expanded="true" height="480" width="705">
          <operator activated="true" class="retrieve" compatibility="5.2.006" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
            <parameter key="repository_entry" value="GooglePagesStep2"/>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.2.006" expanded="true" height="76" name="Select Attributes" width="90" x="180" y="30">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="URL"/>
            <parameter key="attributes" value="Content-Length|Content-Type|Date|Expires|Last-Modified|Response-Code|Response-Message|URL|token_number"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.2.006" expanded="true" height="76" name="Generate Attributes (2)" width="90" x="315" y="30">
            <list key="function_descriptions">
              <parameter key="PDF" value="contains(Href,&quot;.pdf&quot;)"/>
            </list>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="5.2.006" expanded="true" height="76" name="Filter Examples" width="90" x="450" y="30"/>
          <operator activated="true" class="loop_examples" compatibility="5.2.006" expanded="true" height="94" name="Loop Examples" width="90" x="112" y="210">
            <process expanded="true" height="357" width="480">
              <operator activated="true" class="extract_macro" compatibility="5.2.006" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
                <parameter key="macro" value="href"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="attribute_name" value="Href"/>
                <parameter key="example_index" value="%{example}"/>
              </operator>
              <operator activated="true" class="handle_exception" compatibility="5.2.006" expanded="true" height="94" name="Handle Exception" width="90" x="246" y="30">
                <parameter key="exception_macro" value="exceptionmsg"/>
                <process expanded="true" height="386" width="279">
                  <operator activated="true" class="web:get_webpage" compatibility="5.2.000" expanded="true" height="60" name="Get Page" width="90" x="45" y="30">
                    <parameter key="url" value="%{href}"/>
                    <parameter key="random_user_agent" value="true"/>
                    <parameter key="connection_timeout" value="100000"/>
                    <parameter key="read_timeout" value="100000"/>
                    <parameter key="accept_cookies" value="all"/>
                    <list key="query_parameters"/>
                    <list key="request_properties"/>
                  </operator>
                  <operator activated="true" class="text:extract_token_number" compatibility="5.2.002" expanded="true" height="60" name="Extract Token Number" width="90" x="45" y="120">
                    <parameter key="condition" value="matches"/>
                    <parameter key="regular_expression" value="LIBRAR"/>
                  </operator>
                  <operator activated="true" class="text:extract_token_number" compatibility="5.2.002" expanded="true" height="60" name="Extract Token Number (2)" width="90" x="45" y="210">
                    <parameter key="metadata_key" value="LIBRARnumber"/>
                    <parameter key="condition" value="matches"/>
                    <parameter key="regular_expression" value="LIBRAR"/>
                    <parameter key="invert_condition" value="true"/>
                  </operator>
                  <operator activated="true" class="text:documents_to_data" compatibility="5.2.002" expanded="true" height="76" name="Documents to Data (2)" width="90" x="179" y="210">
                    <parameter key="text_attribute" value="NewsletterDoc"/>
                    <parameter key="label_attribute" value="Href"/>
                  </operator>
                  <connect from_port="in 1" to_port="out 1"/>
                  <connect from_op="Get Page" from_port="output" to_op="Extract Token Number" to_port="document"/>
                  <connect from_op="Extract Token Number" from_port="document" to_op="Extract Token Number (2)" to_port="document"/>
                  <connect from_op="Extract Token Number (2)" from_port="document" to_op="Documents to Data (2)" to_port="documents 1"/>
                  <connect from_op="Documents to Data (2)" from_port="example set" to_port="out 2"/>
                  <portSpacing port="source_in 1" spacing="0"/>
                  <portSpacing port="source_in 2" spacing="0"/>
                  <portSpacing port="source_in 3" spacing="0"/>
                  <portSpacing port="sink_out 1" spacing="0"/>
                  <portSpacing port="sink_out 2" spacing="0"/>
                  <portSpacing port="sink_out 3" spacing="0"/>
                </process>
                <process expanded="true" height="375" width="202">
                  <connect from_port="in 1" to_port="out 1"/>
                  <connect from_port="in 2" to_port="out 2"/>
                  <portSpacing port="source_in 1" spacing="0"/>
                  <portSpacing port="source_in 2" spacing="0"/>
                  <portSpacing port="source_in 3" spacing="0"/>
                  <portSpacing port="sink_out 1" spacing="0"/>
                  <portSpacing port="sink_out 2" spacing="0"/>
                  <portSpacing port="sink_out 3" spacing="0"/>
                </process>
              </operator>
              <connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
              <connect from_op="Extract Macro" from_port="example set" to_op="Handle Exception" to_port="in 1"/>
              <connect from_op="Handle Exception" from_port="out 1" to_port="example set"/>
              <connect from_op="Handle Exception" from_port="out 2" to_port="output 1"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_example set" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <operator activated="true" class="flatten_collection" compatibility="5.2.006" expanded="true" height="60" name="Flatten Collection" width="90" x="246" y="255"/>
          <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store (2)" width="90" x="380" y="255">
            <parameter key="repository_entry" value="GooglePagesStep3Docs"/>
          </operator>
          <operator activated="true" class="store" compatibility="5.2.006" expanded="true" height="60" name="Store" width="90" x="246" y="165">
            <parameter key="repository_entry" value="GooglePagesStep3Store"/>
          </operator>
          <connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
          <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
          <connect from_op="Loop Examples" from_port="example set" to_op="Store" to_port="input"/>
          <connect from_op="Loop Examples" from_port="output 1" to_op="Flatten Collection" to_port="collection"/>
          <connect from_op="Flatten Collection" from_port="flat" to_op="Store (2)" to_port="input"/>
          <connect from_op="Store (2)" from_port="through" to_port="result 2"/>
          <connect from_op="Store" from_port="through" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
Sign In or Register to comment.