RapidMiner 9.7 is Now Available

Lots of amazing new improvements including true version control! Learn more about what's new here.


Xpath problem not getting attribute for simple Xpath expressions

udaykumarudaykumar Member Posts: 6 Contributor I
edited November 2018 in Help
I have to extract the column values from html while extracting the attribute value is always showing ? symbol without any output

i have tried with many simple Xpaths like


but the output is not shown correctly and i have made changes to the process as mentioned in the thread of rapidminer forum names "Xpath commands working in google docs not in rapidminer"

My process is as follows

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
  <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="text:process_document_from_file" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Files" width="90" x="246" y="75">
        <list key="text_directories">
          <parameter key="all" value="/home/user/Desktop/march_04/single"/>
        <parameter key="content_type" value="html"/>
        <process expanded="true">
          <operator activated="true" class="text:extract_information" compatibility="5.3.002" expanded="true" height="60" name="Extract Information" width="90" x="313" y="165">
            <parameter key="query_type" value="XPath"/>
            <list key="string_machting_queries"/>
            <list key="regular_expression_queries"/>
            <list key="regular_region_queries"/>
            <list key="xpath_queries">
              <parameter key="Temp" value="//h:section[@class='fixed']/text()"/>
            <list key="namespaces"/>
            <list key="index_queries"/>
          <connect from_port="document" to_op="Extract Information" to_port="document"/>
          <connect from_op="Extract Information" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
      <operator activated="true" class="write_csv" compatibility="5.3.015" expanded="true" height="76" name="Write CSV" width="90" x="447" y="75">
        <parameter key="csv_file" value="/home/user/Desktop/march_04/crawl_web_output.csv"/>
      <connect from_op="Process Documents from Files" from_port="example set" to_op="Write CSV" to_port="input"/>
      <connect from_op="Write CSV" from_port="through" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
Sign In or Register to comment.