PNG chunk data into image dimensions

robinrobin Member Posts: 100 Guru
I am trying to get RM to determine the size of images it is looking at when applying machine learning. All of the images we deal with are PNG and the PNG specifications are located here: https://www.w3.org/TR/PNG/#4Concepts.PNGImage

Looking at the ASCI character for the RapidMiner logo located at rapidminer-logo-retina.png the first 24 ASCII characters are 
âPNG


IHDRôí
The image size is contained in the first chunk after IHDR

I use this process to extract the ASCII characters for the image size:

<?xml version="1.0" encoding="UTF-8"?><process version="8.2.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="8.2.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="web:get_webpage" compatibility="7.3.000" expanded="true" height="68" name="Get Page" width="90" x="112" y="136">
        <parameter key="url" value="https://1xltkxylmzx3z8gd647akcdvov-wpengine.netdna-ssl.com/wp-content/uploads/2016/06/rapidminer-logo-retina.png"/>
        <list key="query_parameters"/>
        <list key="request_properties"/>
      </operator>
      <operator activated="true" class="text:write_document" compatibility="8.1.000" expanded="true" height="82" name="Write Document" width="90" x="246" y="136">
        <parameter key="file" value="/Users/robinmeisel/Desktop/test.txt"/>
      </operator>
      <operator activated="true" class="text:documents_to_data" compatibility="8.1.000" expanded="true" height="82" name="Documents to Data" width="90" x="380" y="136">
        <parameter key="text_attribute" value="text"/>
        <parameter key="add_meta_information" value="false"/>
      </operator>
      <operator activated="true" class="cut" compatibility="8.2.000" expanded="true" height="82" name="Cut" width="90" x="514" y="136">
        <parameter key="first_character_index" value="17"/>
        <parameter key="last_character_index" value="24"/>
      </operator>
      <connect from_op="Get Page" from_port="output" to_op="Write Document" to_port="document"/>
      <connect from_op="Write Document" from_port="document" to_op="Documents to Data" to_port="documents 1"/>
      <connect from_op="Documents to Data" from_port="example set" to_op="Cut" to_port="example set input"/>
      <connect from_op="Cut" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
The result for the above process is 
ôí
How would you recommend converting this into image height and width?

Tagged:

Best Answer

  • MartinLiebigMartinLiebig Administrator, Moderator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,503 RM Data Scientist
    Solution Accepted
    Hi @robin ,
    honestly - i would use groovy here.. It's so straight forward. See attached example.

    BR,
    Martin

    <?xml version="1.0" encoding="UTF-8"?><process version="9.2.000"><br>  <context><br>    <input/><br>    <output/><br>    <macros/><br>  </context><br>  <operator activated="true" class="process" compatibility="9.2.000" expanded="true" name="Process"><br>    <parameter key="logverbosity" value="init"/><br>    <parameter key="random_seed" value="2001"/><br>    <parameter key="send_mail" value="never"/><br>    <parameter key="notification_email" value=""/><br>    <parameter key="process_duration_for_mail" value="30"/><br>    <parameter key="encoding" value="SYSTEM"/><br>    <process expanded="true"><br>      <operator activated="true" class="open_file" compatibility="9.2.000" expanded="true" height="68" name="Open File" width="90" x="45" y="85"><br>        <parameter key="resource_type" value="URL"/><br>        <parameter key="filename" value="https://1xltkxylmzx3z8gd647akcdvov-wpengine.netdna-ssl.com/wp-content/uploads/2016/06/rapidminer-logo-retina.png"/><br>        <parameter key="url" value="https://1xltkxylmzx3z8gd647akcdvov-wpengine.netdna-ssl.com/wp-content/uploads/2016/06/rapidminer-logo-retina.png"/><br>      </operator><br>      <operator activated="true" class="execute_script" compatibility="9.2.000" expanded="true" height="82" name="Execute Script" width="90" x="246" y="85"><br>        <parameter key="script" value="import java.awt.image.BufferedImage;&#10;import javax.imageio.ImageIO;&#10;&#10;import com.rapidminer.example.Attribute;&#10;import com.rapidminer.example.ExampleSet;&#10;import com.rapidminer.example.table.AttributeFactory;&#10;import com.rapidminer.example.utils.ExampleSetBuilder;&#10;import com.rapidminer.example.utils.ExampleSets;&#10;import com.rapidminer.operator.nio.file.BufferedFileObject;&#10;import com.rapidminer.tools.Ontology;&#10;&#10;BufferedFileObject input = input[0];&#10;BufferedImage buf = ImageIO.read(input.getFile());&#10;&#10;&#10;Attribute heightAtt = AttributeFactory.createAttribute(&quot;Height&quot;, Ontology.INTEGER);&#10;Attribute widhtAtt = AttributeFactory.createAttribute(&quot;Width&quot;, Ontology.INTEGER);&#10;&#10;ExampleSetBuilder builder = ExampleSets.from(heightAtt, widhtAtt);&#10;double[] row = new double[2];&#10;row[0] = buf.getWidth();&#10;row[1] = buf.getHeight();;&#10;builder.addRow(row);&#10;&#10;return builder.build();&#10;"/><br>        <parameter key="standard_imports" value="true"/><br>      </operator><br>      <connect from_op="Open File" from_port="file" to_op="Execute Script" to_port="input 1"/><br>      <connect from_op="Execute Script" from_port="output 1" to_port="result 1"/><br>      <portSpacing port="source_input 1" spacing="0"/><br>      <portSpacing port="sink_result 1" spacing="0"/><br>      <portSpacing port="sink_result 2" spacing="0"/><br>    </process><br>  </operator><br></process><br><br>


    - Sr. Director Data Solutions, Altair RapidMiner -
    Dortmund, Germany

Answers

  • yyhuangyyhuang Administrator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 364 RM Data Scientist
    edited March 2019
    Hi @robin,

    Thanks for sharing the use case with the community. I suggest you use the IMMI image mining extension.
    To install IMMI, first download from here http://www.burgsys.com/image-analysis-software.php

    Here is my result for extracting metadata about image info using IMMI 7.0 under RapidMiner 9.2







    <?xml version="1.0" encoding="UTF-8"?><process version="9.2.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.2.000" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="open_file" compatibility="9.2.000" expanded="true" height="68" name="Open File" width="90" x="45" y="238">
            <parameter key="resource_type" value="URL"/>
            <parameter key="url" value="https://1xltkxylmzx3z8gd647akcdvov-wpengine.netdna-ssl.com/wp-content/uploads/2016/06/rapidminer-logo-retina.png"/>
          </operator>
          <operator activated="true" class="image:read_image" compatibility="7.0.000" expanded="true" height="68" name="Read Image" width="90" x="179" y="238">
            <parameter key="set_label" value="none"/>
            <parameter key="open_as" value="color"/>
            <parameter key="bit_depth" value="8bit"/>
            <parameter key="add_mask" value="none"/>
            <parameter key="add_roi" value="false"/>
            <parameter key="default_roi_filename" value="true"/>
            <parameter key="ignore_if_roi_file_does_not_exist" value="false"/>
          </operator>
          <operator activated="false" class="text:documents_to_data" compatibility="8.1.000" expanded="true" height="68" name="Documents to Data" width="90" x="246" y="34">
            <parameter key="text_attribute" value="text"/>
            <parameter key="add_meta_information" value="false"/>
            <parameter key="datamanagement" value="double_sparse_array"/>
            <parameter key="data_management" value="auto"/>
          </operator>
          <operator activated="false" class="cut" compatibility="9.2.000" expanded="true" height="82" name="Cut" width="90" x="380" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="first_character_index" value="17"/>
            <parameter key="last_character_index" value="24"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="9.2.000" expanded="true" height="103" name="Multiply" width="90" x="380" y="238"/>
          <operator activated="true" class="image:image_properties_as_macro" compatibility="7.0.000" expanded="true" height="68" name="Image Properties As Macro" width="90" x="581" y="391">
            <parameter key="include width" value="true"/>
            <parameter key="width macro" value="image_width"/>
            <parameter key="include height" value="true"/>
            <parameter key="height macro" value="image_height"/>
            <parameter key="include name" value="false"/>
            <parameter key="name macro" value="image_name"/>
            <parameter key="include label" value="false"/>
            <parameter key="label macro" value="image_label"/>
            <parameter key="include number of segments" value="false"/>
            <parameter key="number of segments macro" value="image_segments"/>
          </operator>
          <operator activated="true" class="image:text_watermark" compatibility="7.0.000" expanded="true" height="68" name="Text Watermark" width="90" x="581" y="238">
            <parameter key="watermark" value="yyhuangRM"/>
          </operator>
          <connect from_op="Open File" from_port="file" to_op="Read Image" to_port="file"/>
          <connect from_op="Read Image" from_port="output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Text Watermark" to_port="image"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Image Properties As Macro" to_port="image"/>
          <connect from_op="Image Properties As Macro" from_port="image" to_port="result 2"/>
          <connect from_op="Text Watermark" from_port="image" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="189"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    
    YY
  • yyhuangyyhuang Administrator, Employee, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 364 RM Data Scientist
    Bravo Dr @mschmitz, how neat and smart your solution is! 
  • robinrobin Member Posts: 100 Guru
    Thank you @mschmitz! That is a perfect and simple solution. 
Sign In or Register to comment.