<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:clearspace="http://www.jivesoftware.com/xmlns/jive/rss" version="2.0">
  <channel>
    <title>Adobe Community: Message List - Some characters extracted from pdf hasn't corresponding unicode in its font</title>
    <link>https://forums.adobe.com/community/design_development/pdf_language_and_specifications?view=discussions</link>
    <description>Most recent forum messages</description>
    <language>en</language>
    <pubDate>Mon, 10 Feb 2014 11:34:30 GMT</pubDate>
    <generator>Jive Engage 7.0.0.1  (http://jivesoftware.com/products/)</generator>
    <dc:date>2014-02-10T11:34:30Z</dc:date>
    <dc:language>en</dc:language>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104774?tstart=0#6104774</link>
      <description>&lt;!-- [DocumentBodyStart:8efc45f5-54b3-4a01-b3f8-e12a3776395d] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;No, Acrobat is not free, but you can do the same test with the free Adobe Reader, using Copy/Paste to extract text.&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:8efc45f5-54b3-4a01-b3f8-e12a3776395d] --&gt;&lt;img src='/beacon?t=1415903416775' /&gt;</description>
      <pubDate>Mon, 10 Feb 2014 11:34:30 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104774?tstart=0#6104774</guid>
      <dc:date>2014-02-10T11:34:30Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104725?tstart=0#6104725</link>
      <description>&lt;!-- [DocumentBodyStart:9c106797-0703-49f8-8bc6-33c55a965a3d] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;Never mind, Is it free operation?&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:9c106797-0703-49f8-8bc6-33c55a965a3d] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 11:24:01 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104725?tstart=0#6104725</guid>
      <dc:date>2014-02-10T11:24:01Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>1</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104720?tstart=0#6104720</link>
      <description>&lt;!-- [DocumentBodyStart:6053d798-4216-42b5-9bff-c6611b38b05e] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;1. Extract text using Acrobat&lt;/p&gt;&lt;p&gt;2. Look at it or analyse it.&lt;/p&gt;&lt;p&gt;Sorry, I know that seems an unhelpful answer but I don't understand what else you can mean.&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:6053d798-4216-42b5-9bff-c6611b38b05e] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 11:15:59 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104720?tstart=0#6104720</guid>
      <dc:date>2014-02-10T11:15:59Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>2</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104718?tstart=0#6104718</link>
      <description>&lt;!-- [DocumentBodyStart:5686ebff-f5cb-4a6d-bcdc-e2b3416b1bfb] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;How can I test that Acrobat extract text accurately or not?&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:5686ebff-f5cb-4a6d-bcdc-e2b3416b1bfb] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 11:10:53 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104718?tstart=0#6104718</guid>
      <dc:date>2014-02-10T11:10:53Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>3</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104587?tstart=0#6104587</link>
      <description>&lt;!-- [DocumentBodyStart:3278761f-6654-4f91-8074-517e2ec6e46e] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;You cannot. Probably. Many PDF files do not permit accurate text extraction.&lt;/p&gt;&lt;p style="min-height: 8pt; padding: 0px;"&gt;&amp;nbsp;&lt;/p&gt;&lt;p&gt;A simple test is to see if Acrobat can extract text accurately. Acrobat has 20 years of development in this area, so if it cannot get the text, it is probably the case that you cannot either.&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:3278761f-6654-4f91-8074-517e2ec6e46e] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 10:23:36 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104587?tstart=0#6104587</guid>
      <dc:date>2014-02-10T10:23:36Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>4</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104581?tstart=0#6104581</link>
      <description>&lt;!-- [DocumentBodyStart:332cb93d-e925-4d79-a020-8f324e1d5e32] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;Ok, now character has no unicode inside pdf or inside font(as shown in image), how can I get this character outside the PDF?&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:332cb93d-e925-4d79-a020-8f324e1d5e32] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 10:05:51 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104581?tstart=0#6104581</guid>
      <dc:date>2014-02-10T10:05:51Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>5</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104467?tstart=0#6104467</link>
      <description>&lt;!-- [DocumentBodyStart:90abef79-b299-4b14-8d80-658c5ea1a4a2] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;No, Acrobat uses the embedded fonts. You can clearly see this on screen by using unusual fonts.&lt;/p&gt;&lt;p style="min-height: 8pt; padding: 0px;"&gt;&amp;nbsp;&lt;/p&gt;&lt;p&gt;Unicode is not used, and not needed, to show embedded fonts on screen. Either Encoding or CMap are used to directly find characters in the font. The PDF specification has full details.&lt;/p&gt;&lt;p&gt;.&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:90abef79-b299-4b14-8d80-658c5ea1a4a2] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 09:43:15 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104467?tstart=0#6104467</guid>
      <dc:date>2014-02-10T09:43:15Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>6</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104485?tstart=0#6104485</link>
      <description>&lt;!-- [DocumentBodyStart:b2f7258d-a23b-45fe-baae-b07ddcaeff17] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;Thanks for your reply, you told me that the acrobat do not use unicode to show the text,is this mean that the acrobat ignore the embeded fonts during the show text process?, if this is true so what is the use of the fonts inside the pdf. thanks in advance ,,,,,,,,,,,&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:b2f7258d-a23b-45fe-baae-b07ddcaeff17] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 09:32:49 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104485?tstart=0#6104485</guid>
      <dc:date>2014-02-10T09:32:49Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>7</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Re: Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104396?tstart=0#6104396</link>
      <description>&lt;!-- [DocumentBodyStart:926e6ce3-37bd-45fe-8f90-1c1e6b202edd] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;Acrobat does not use Unicode to show the characters in a PDF, so this will always work. Unicode is a layer added during text extraction, and sometimes this will fail, either because there is no mapping defined in the PDF, or because the characters have no Unicode range.&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:926e6ce3-37bd-45fe-8f90-1c1e6b202edd] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 08:53:59 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104396?tstart=0#6104396</guid>
      <dc:date>2014-02-10T08:53:59Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>8</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
    <item>
      <title>Some characters extracted from pdf hasn't corresponding unicode in its font</title>
      <link>https://forums.adobe.com/message/6104436?tstart=0#6104436</link>
      <description>&lt;!-- [DocumentBodyStart:7d9865a2-9434-464d-9cf3-eac318fea1c4] --&gt;&lt;div class="jive-rendered-content"&gt;&lt;p&gt;I extracted font form pdf(arabic text) and after I opened this font I found that there are some characters hasn't corresponding unicode although the other has!&lt;/p&gt;&lt;p&gt;I ask about why these characters hasn't corresponding Unicode such as remaining characters? and how PDF read these characters and display it in its file although it has't unicode? because I can't&amp;nbsp; get these characters outside PDF!.&lt;/p&gt;&lt;p style="min-height: 8pt; padding: 0px;"&gt;&amp;nbsp;&lt;/p&gt;&lt;p&gt;This image show part of font I extracted has characters with no unicode(question mark instead).&lt;/p&gt;&lt;p&gt;&lt;a href="https://forums.adobe.com/servlet/JiveServlet/showImage/2-6104436-547663/PartOfFont.png"&gt;&lt;img alt="PartOfFont.png" class="jive-image jive-image-thumbnail" height="45" onclick="" src="https://forums.adobe.com/servlet/JiveServlet/downloadImage/2-6104436-547663/450-45/PartOfFont.png" width="450"/&gt;&lt;/a&gt;&lt;/p&gt;&lt;/div&gt;&lt;!-- [DocumentBodyEnd:7d9865a2-9434-464d-9cf3-eac318fea1c4] --&gt;</description>
      <pubDate>Mon, 10 Feb 2014 08:46:02 GMT</pubDate>
      <author>forums_noreply@adobe.com</author>
      <guid>https://forums.adobe.com/message/6104436?tstart=0#6104436</guid>
      <dc:date>2014-02-10T08:46:02Z</dc:date>
      <clearspace:dateToText>9 months 2 days ago</clearspace:dateToText>
      <clearspace:replyCount>9</clearspace:replyCount>
      <clearspace:objectType>0</clearspace:objectType>
    </item>
  </channel>
</rss>

