Executing Tesseract

Robi_MeRobi_Me Member Posts: 30 Maven
I am trying to execute Tesseract to extract detail from PDF's that have been converted to PNG. Any idea why I am getting an error 127 when I am able to execute via terminal? 

<?xml version="1.0" encoding="UTF-8"?><process version="9.10.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.10.000" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="concurrency:loop_files" compatibility="9.10.000" expanded="true" height="103" name="Loop Files" width="90" x="179" y="85">
        <parameter key="directory" value="/Users/robinmeisel/Dropbox/master_png/Autoseis"/>
        <parameter key="filter_type" value="glob"/>
        <parameter key="recursive" value="false"/>
        <parameter key="enable_macros" value="true"/>
        <parameter key="macro_for_file_name" value="file_name"/>
        <parameter key="macro_for_file_type" value="file_type"/>
        <parameter key="macro_for_folder_name" value="folder_name"/>
        <parameter key="reuse_results" value="false"/>
        <parameter key="enable_parallel_execution" value="true"/>
        <process expanded="true">
          <operator activated="true" class="text:create_document" compatibility="9.3.001" expanded="true" height="68" name="Create Document" width="90" x="112" y="136">
            <parameter key="text" value="tesseract %{file_name} %{file_name}.txt"/>
            <parameter key="add label" value="false"/>
            <parameter key="label_type" value="nominal"/>
          </operator>
          <operator activated="true" class="text:write_document" compatibility="9.3.001" expanded="true" height="82" name="Write Document" width="90" x="246" y="136">
            <parameter key="file" value="/Users/robinmeisel/Dropbox/master_png/Autoseis/tesseract.sh"/>
            <parameter key="overwrite" value="true"/>
            <parameter key="encoding" value="SYSTEM"/>
          </operator>
          <operator activated="true" class="productivity:execute_program" compatibility="9.10.000" expanded="true" height="124" name="Execute Program (2)" width="90" x="380" y="136">
            <parameter key="command" value="chmod +x tesseract.sh"/>
            <parameter key="log_stdout" value="true"/>
            <parameter key="log_stderr" value="true"/>
            <parameter key="working_directory" value="/Users/robinmeisel/Dropbox/master_png/Autoseis"/>
            <list key="env_variables"/>
          </operator>
          <operator activated="true" class="productivity:execute_program" compatibility="9.10.000" expanded="true" height="103" name="Execute Program" width="90" x="514" y="136">
            <parameter key="command" value="./tesseract.sh"/>
            <parameter key="log_stdout" value="true"/>
            <parameter key="log_stderr" value="true"/>
            <parameter key="working_directory" value="/Users/robinmeisel/Dropbox/master_png/Autoseis"/>
            <list key="env_variables"/>
          </operator>
          <connect from_op="Create Document" from_port="output" to_op="Write Document" to_port="document"/>
          <connect from_op="Write Document" from_port="document" to_op="Execute Program (2)" to_port="through 1"/>
          <connect from_op="Execute Program (2)" from_port="out" to_op="Execute Program" to_port="in"/>
          <connect from_op="Execute Program" from_port="out" to_port="output 1"/>
          <connect from_op="Execute Program" from_port="err" to_port="output 2"/>
          <portSpacing port="source_file object" spacing="0"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_output 1" spacing="0"/>
          <portSpacing port="sink_output 2" spacing="0"/>
          <portSpacing port="sink_output 3" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Loop Files" from_port="output 1" to_port="result 1"/>
      <connect from_op="Loop Files" from_port="output 2" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>


Tagged:

Best Answer

  • Robi_MeRobi_Me Member Posts: 30 Maven
    Solution Accepted
    which tesseract

    /usr/local/bin/tesseract

    use the path inside of the Create Document operator

Answers

  • mschmitzmschmitz Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,249 RM Data Scientist
    @sgenzer or @ey can you help here?
    - Head of Data Science Services at RapidMiner -
    Dortmund, Germany
  • Robi_MeRobi_Me Member Posts: 30 Maven
    @sgenzer and @ey I am on Mac running Big Sur, I seem to enscounter this whenever I run the Execute Programs operator. When I go to terminal I can execute the program, just RapidMinner cannot. Any idea? 
Sign In or Register to comment.