// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "core/fpdfapi/parser/cpdf_data_avail.h"

#include <algorithm>
#include <memory>
#include <utility>

#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_hint_tables.h"
#include "core/fpdfapi/parser/cpdf_linearized_header.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_page_object_avail.h"
#include "core/fpdfapi/parser/cpdf_read_validator.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
#include "core/fxcrt/autorestorer.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/stl_util.h"
#include "third_party/base/check.h"
#include "third_party/base/containers/contains.h"
#include "third_party/base/notreached.h"
#include "third_party/base/numerics/safe_conversions.h"

namespace {

// static
CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) {
  constexpr size_t kMaxHierarchyDepth = 64;
  size_t depth = 0;

  CPDF_Dictionary* dictionary_to_check = pDict;
  while (dictionary_to_check) {
    CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
    if (result)
      return result;
    CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
    dictionary_to_check = parent ? parent->GetDict() : nullptr;

    if (++depth > kMaxHierarchyDepth) {
      // We have cycle in parents hierarchy.
      return nullptr;
    }
  }
  return nullptr;
}

class HintsScope {
 public:
  HintsScope(RetainPtr<CPDF_ReadValidator> validator,
             CPDF_DataAvail::DownloadHints* hints)
      : validator_(std::move(validator)) {
    DCHECK(validator_);
    validator_->SetDownloadHints(hints);
  }

  ~HintsScope() { validator_->SetDownloadHints(nullptr); }

 private:
  RetainPtr<CPDF_ReadValidator> validator_;
};

}  // namespace

CPDF_DataAvail::FileAvail::~FileAvail() = default;

CPDF_DataAvail::DownloadHints::~DownloadHints() = default;

CPDF_DataAvail::CPDF_DataAvail(
    FileAvail* pFileAvail,
    const RetainPtr<IFX_SeekableReadStream>& pFileRead)
    : m_pFileRead(
          pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, pFileAvail)),
      m_dwFileLen(m_pFileRead->GetSize()) {}

CPDF_DataAvail::~CPDF_DataAvail() {
  m_pHintTables.reset();
  if (m_pDocument)
    m_pDocument->RemoveObserver(this);
}

void CPDF_DataAvail::OnObservableDestroyed() {
  m_pDocument = nullptr;
  m_pFormAvail.reset();
  m_PagesArray.clear();
  m_PagesObjAvail.clear();
  m_PagesResourcesAvail.clear();
}

CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
    DownloadHints* pHints) {
  if (!m_dwFileLen)
    return kDataError;

  DCHECK(m_SeenPageObjList.empty());
  AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&m_SeenPageObjList);
  const HintsScope hints_scope(GetValidator(), pHints);
  while (!m_bDocAvail) {
    if (!CheckDocStatus())
      return kDataNotAvailable;
  }

  return kDataAvailable;
}

bool CPDF_DataAvail::CheckDocStatus() {
  switch (m_internalStatus) {
    case InternalStatus::kHeader:
      return CheckHeader();
    case InternalStatus::kFirstPage:
      return CheckFirstPage();
    case InternalStatus::kHintTable:
      return CheckHintTables();
    case InternalStatus::kLoadAllCrossRef:
      return CheckAndLoadAllXref();
    case InternalStatus::kLoadAllFile:
      return LoadAllFile();
    case InternalStatus::kRoot:
      return CheckRoot();
    case InternalStatus::kInfo:
      return CheckInfo();
    case InternalStatus::kPageTree:
      if (m_bTotalLoadPageTree)
        return CheckPages();
      return LoadDocPages();
    case InternalStatus::kPage:
      if (m_bTotalLoadPageTree)
        return CheckPage();
      m_internalStatus = InternalStatus::kPageLaterLoad;
      return true;
    case InternalStatus::kError:
      return LoadAllFile();
    case InternalStatus::kPageLaterLoad:
      m_internalStatus = InternalStatus::kPage;
      [[fallthrough]];
    default:
      m_bDocAvail = true;
      return true;
  }
}

bool CPDF_DataAvail::CheckPageStatus() {
  switch (m_internalStatus) {
    case InternalStatus::kPageTree:
      return CheckPages();
    case InternalStatus::kPage:
      return CheckPage();
    case InternalStatus::kError:
      return LoadAllFile();
    default:
      m_bPagesTreeLoad = true;
      m_bPagesLoad = true;
      return true;
  }
}

bool CPDF_DataAvail::LoadAllFile() {
  if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
    m_internalStatus = InternalStatus::kDone;
    return true;
  }
  return false;
}

bool CPDF_DataAvail::CheckAndLoadAllXref() {
  if (!m_pCrossRefAvail) {
    CPDF_ReadValidator::ScopedSession read_session(GetValidator());
    const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
    if (GetValidator()->has_read_problems())
      return false;

    if (last_xref_offset <= 0) {
      m_internalStatus = InternalStatus::kError;
      return false;
    }

    m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
                                                            last_xref_offset);
  }

  switch (m_pCrossRefAvail->CheckAvail()) {
    case kDataAvailable:
      break;
    case kDataNotAvailable:
      return false;
    case kDataError:
      m_internalStatus = InternalStatus::kError;
      return false;
    default:
      NOTREACHED();
      return false;
  }

  if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
      !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
    m_internalStatus = InternalStatus::kLoadAllFile;
    return false;
  }

  m_internalStatus = InternalStatus::kRoot;
  return true;
}

RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
                                                 bool* pExistInFile) {
  *pExistInFile = false;
  CPDF_Parser* pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
  if (!pParser)
    return nullptr;

  CPDF_ReadValidator::ScopedSession read_session(GetValidator());
  RetainPtr<CPDF_Object> pRet = pParser->ParseIndirectObject(objnum);
  if (!pRet)
    return nullptr;

  *pExistInFile = true;
  if (GetValidator()->has_read_problems())
    return nullptr;

  return pRet;
}

bool CPDF_DataAvail::CheckInfo() {
  const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
  if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
    m_internalStatus = InternalStatus::kPageTree;
    return true;
  }

  CPDF_ReadValidator::ScopedSession read_session(GetValidator());
  m_parser.ParseIndirectObject(dwInfoObjNum);
  if (GetValidator()->has_read_problems())
    return false;

  m_internalStatus = InternalStatus::kPageTree;
  return true;
}

bool CPDF_DataAvail::CheckRoot() {
  const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
  if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
    m_internalStatus = InternalStatus::kError;
    return true;
  }

  CPDF_ReadValidator::ScopedSession read_session(GetValidator());
  m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
  if (GetValidator()->has_read_problems())
    return false;

  const CPDF_Reference* pRef =
      ToReference(m_pRoot ? m_pRoot->GetObjectFor("Pages") : nullptr);
  if (!pRef) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  m_PagesObjNum = pRef->GetRefObjNum();
  m_internalStatus = InternalStatus::kInfo;
  return true;
}

bool CPDF_DataAvail::PreparePageItem() {
  const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
  const CPDF_Reference* pRef =
      ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
  if (!pRef) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  m_PagesObjNum = pRef->GetRefObjNum();
  m_internalStatus = InternalStatus::kPageTree;
  return true;
}

bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
  return m_pageMapCheckState.insert(dwPage).second;
}

void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
  m_pageMapCheckState.erase(dwPage);
}

bool CPDF_DataAvail::CheckPage() {
  std::vector<uint32_t> UnavailObjList;
  for (uint32_t dwPageObjNum : m_PageObjList) {
    bool bExists = false;
    RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
    if (!pObj) {
      if (bExists)
        UnavailObjList.push_back(dwPageObjNum);
      continue;
    }

    switch (pObj->GetType()) {
      case CPDF_Object::kArray: {
        CPDF_ArrayLocker locker(pObj->AsArray());
        for (const auto& pArrayObj : locker) {
          const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
          if (pRef)
            UnavailObjList.push_back(pRef->GetRefObjNum());
        }
        break;
      }
      case CPDF_Object::kDictionary:
        if (pObj->GetDict()->GetNameFor("Type") == "Pages")
          m_PagesArray.push_back(std::move(pObj));
        break;
      default:
        break;
    }
  }
  m_PageObjList.clear();
  if (!UnavailObjList.empty()) {
    m_PageObjList = std::move(UnavailObjList);
    return false;
  }
  size_t iPages = m_PagesArray.size();
  for (size_t i = 0; i < iPages; ++i) {
    RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
    if (pPages && !GetPageKids(pPages.Get())) {
      m_PagesArray.clear();
      m_internalStatus = InternalStatus::kError;
      return false;
    }
  }
  m_PagesArray.clear();
  if (m_PageObjList.empty())
    m_internalStatus = InternalStatus::kDone;

  return true;
}

bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
  const CPDF_Dictionary* pDict = pPages->GetDict();
  const CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
  if (!pKids)
    return true;

  std::vector<uint32_t> object_numbers;
  switch (pKids->GetType()) {
    case CPDF_Object::kReference:
      object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
      break;
    case CPDF_Object::kArray: {
      CPDF_ArrayLocker locker(pKids->AsArray());
      for (const auto& pArrayObj : locker) {
        const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
        if (pRef)
          object_numbers.push_back(pRef->GetRefObjNum());
      }
      break;
    }
    default:
      m_internalStatus = InternalStatus::kError;
      return false;
  }

  for (uint32_t num : object_numbers) {
    bool inserted = m_SeenPageObjList.insert(num).second;
    if (inserted)
      m_PageObjList.push_back(num);
  }
  return true;
}

bool CPDF_DataAvail::CheckPages() {
  bool bExists = false;
  RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
  if (!bExists) {
    m_internalStatus = InternalStatus::kLoadAllFile;
    return true;
  }

  if (!pPages) {
    if (m_internalStatus == InternalStatus::kError) {
      m_internalStatus = InternalStatus::kLoadAllFile;
      return true;
    }
    return false;
  }

  if (!GetPageKids(pPages.Get())) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  m_internalStatus = InternalStatus::kPage;
  return true;
}

bool CPDF_DataAvail::CheckHeader() {
  switch (CheckHeaderAndLinearized()) {
    case kDataAvailable:
      m_internalStatus = m_pLinearized ? InternalStatus::kFirstPage
                                       : InternalStatus::kLoadAllCrossRef;
      return true;
    case kDataNotAvailable:
      return false;
    case kDataError:
      m_internalStatus = InternalStatus::kError;
      return true;
    default:
      NOTREACHED();
      return false;
  }
}

bool CPDF_DataAvail::CheckFirstPage() {
  if (!m_pLinearized->GetFirstPageEndOffset() ||
      !m_pLinearized->GetFileSize() ||
      !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
  dwEnd += 512;
  if ((FX_FILESIZE)dwEnd > m_dwFileLen)
    dwEnd = (uint32_t)m_dwFileLen;

  const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
  const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
  if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
                                                             data_size))
    return false;

  m_internalStatus = InternalStatus::kHintTable;
  return true;
}

bool CPDF_DataAvail::CheckHintTables() {
  CPDF_ReadValidator::ScopedSession read_session(GetValidator());
  m_pHintTables =
      CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());

  if (GetValidator()->read_error()) {
    m_internalStatus = InternalStatus::kError;
    return true;
  }
  if (GetValidator()->has_unavailable_data())
    return false;

  m_internalStatus = InternalStatus::kDone;
  return true;
}

RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
    FX_FILESIZE pos,
    uint32_t objnum,
    CPDF_IndirectObjectHolder* pObjList) const {
  const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
  GetSyntaxParser()->SetPos(pos);
  RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
      pObjList, CPDF_SyntaxParser::ParseType::kLoose);
  GetSyntaxParser()->SetPos(SavedPos);
  return (result && (!objnum || result->GetObjNum() == objnum))
             ? std::move(result)
             : nullptr;
}

CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
  switch (CheckHeaderAndLinearized()) {
    case kDataAvailable:
      return m_pLinearized ? kLinearized : kNotLinearized;
    case kDataNotAvailable:
      return kLinearizationUnknown;
    case kDataError:
      return kNotLinearized;
    default:
      NOTREACHED();
      return kLinearizationUnknown;
  }
}

CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
  if (m_bHeaderAvail)
    return kDataAvailable;

  CPDF_ReadValidator::ScopedSession read_session(GetValidator());
  const absl::optional<FX_FILESIZE> header_offset =
      GetHeaderOffset(GetValidator());
  if (GetValidator()->has_read_problems())
    return kDataNotAvailable;

  if (!header_offset.has_value())
    return kDataError;

  m_parser.m_pSyntax = std::make_unique<CPDF_SyntaxParser>(
      GetValidator(), header_offset.value());
  m_pLinearized = m_parser.ParseLinearizedHeader();
  if (GetValidator()->has_read_problems())
    return kDataNotAvailable;

  m_bHeaderAvail = true;
  return kDataAvailable;
}

bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
  while (true) {
    switch (m_internalStatus) {
      case InternalStatus::kPageTree:
        if (!LoadDocPages())
          return false;
        break;
      case InternalStatus::kPage:
        if (!LoadDocPage(dwPage))
          return false;
        break;
      case InternalStatus::kError:
        return LoadAllFile();
      default:
        m_bPagesTreeLoad = true;
        m_bPagesLoad = true;
        m_bCurPageDictLoadOK = true;
        m_internalStatus = InternalStatus::kPage;
        return true;
    }
  }
}

bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
                                        PageNode* pPageNode) {
  bool bExists = false;
  RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
  if (!bExists) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  if (!pPages)
    return false;

  CPDF_Array* pArray = pPages->AsArray();
  if (!pArray) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  pPageNode->m_type = PageNode::Type::kPages;
  for (size_t i = 0; i < pArray->size(); ++i) {
    CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
    if (!pKid)
      continue;

    auto pNode = std::make_unique<PageNode>();
    pNode->m_dwPageNo = pKid->GetRefObjNum();
    pPageNode->m_ChildNodes.push_back(std::move(pNode));
  }
  return true;
}

bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
                                          PageNode* pPageNode) {
  bool bExists = false;
  RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
  if (!bExists) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  if (!pPage)
    return false;

  if (pPage->IsArray()) {
    pPageNode->m_dwPageNo = dwPageNo;
    pPageNode->m_type = PageNode::Type::kArray;
    return true;
  }

  if (!pPage->IsDictionary()) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  pPageNode->m_dwPageNo = dwPageNo;
  CPDF_Dictionary* pDict = pPage->GetDict();
  const ByteString type = pDict->GetNameFor("Type");
  if (type == "Page") {
    pPageNode->m_type = PageNode::Type::kPage;
    return true;
  }

  if (type != "Pages") {
    m_internalStatus = InternalStatus::kError;
    return false;
  }

  pPageNode->m_type = PageNode::Type::kPages;
  CPDF_Object* pKids = pDict->GetObjectFor("Kids");
  if (!pKids) {
    m_internalStatus = InternalStatus::kPage;
    return true;
  }

  switch (pKids->GetType()) {
    case CPDF_Object::kReference: {
      CPDF_Reference* pKid = pKids->AsReference();
      auto pNode = std::make_unique<PageNode>();
      pNode->m_dwPageNo = pKid->GetRefObjNum();
      pPageNode->m_ChildNodes.push_back(std::move(pNode));
      break;
    }
    case CPDF_Object::kArray: {
      CPDF_Array* pKidsArray = pKids->AsArray();
      for (size_t i = 0; i < pKidsArray->size(); ++i) {
        CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
        if (!pKid)
          continue;

        auto pNode = std::make_unique<PageNode>();
        pNode->m_dwPageNo = pKid->GetRefObjNum();
        pPageNode->m_ChildNodes.push_back(std::move(pNode));
      }
      break;
    }
    default:
      break;
  }
  return true;
}

bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
                                   int32_t iPage,
                                   int32_t& iCount,
                                   int level) {
  if (level >= kMaxPageRecursionDepth)
    return false;

  int32_t iSize = fxcrt::CollectionSize<int32_t>(pageNode.m_ChildNodes);
  if (iSize <= 0 || iPage >= iSize) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }
  for (int32_t i = 0; i < iSize; ++i) {
    PageNode* pNode = pageNode.m_ChildNodes[i].get();
    if (!pNode)
      continue;

    if (pNode->m_type == PageNode::Type::kUnknown) {
      // Updates the type for the unknown page node.
      if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
        return false;
    }
    if (pNode->m_type == PageNode::Type::kArray) {
      // Updates a more specific type for the array page node.
      if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
        return false;
    }
    switch (pNode->m_type) {
      case PageNode::Type::kPage:
        iCount++;
        if (iPage == iCount && m_pDocument)
          m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
        break;
      case PageNode::Type::kPages:
        if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
          return false;
        break;
      case PageNode::Type::kUnknown:
      case PageNode::Type::kArray:
        // Already converted above, error if we get here.
        return false;
    }
    if (iPage == iCount) {
      m_internalStatus = InternalStatus::kDone;
      return true;
    }
  }
  return true;
}

bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
  int iPage = pdfium::base::checked_cast<int>(dwPage);
  if (m_pDocument->GetPageCount() <= iPage ||
      m_pDocument->IsPageLoaded(iPage)) {
    m_internalStatus = InternalStatus::kDone;
    return true;
  }
  if (m_PageNode.m_type == PageNode::Type::kPage) {
    m_internalStatus =
        iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
    return true;
  }
  int32_t iCount = -1;
  return CheckPageNode(m_PageNode, iPage, iCount, 0);
}

bool CPDF_DataAvail::CheckPageCount() {
  bool bExists = false;
  RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
  if (!bExists) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }
  if (!pPages)
    return false;

  CPDF_Dictionary* pPagesDict = pPages->GetDict();
  if (!pPagesDict) {
    m_internalStatus = InternalStatus::kError;
    return false;
  }
  if (!pPagesDict->KeyExist("Kids"))
    return true;

  return pPagesDict->GetIntegerFor("Count") > 0;
}

bool CPDF_DataAvail::LoadDocPages() {
  if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
    return false;

  if (CheckPageCount()) {
    m_internalStatus = InternalStatus::kPage;
    return true;
  }

  m_bTotalLoadPageTree = true;
  return false;
}

bool CPDF_DataAvail::LoadPages() {
  while (!m_bPagesTreeLoad) {
    if (!CheckPageStatus())
      return false;
  }

  if (m_bPagesLoad)
    return true;

  m_pDocument->LoadPages();
  return false;
}

CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
  if (m_bLinearedDataOK)
    return kDataAvailable;
  DCHECK(m_pLinearized);
  if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
      !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
    return kDataError;
  }

  if (!m_bMainXRefLoadTried) {
    const FX_SAFE_FILESIZE prev =
        m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
    const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
    if (main_xref_offset < 0)
      return kDataError;

    if (main_xref_offset == 0)
      return kDataAvailable;

    FX_SAFE_SIZE_T data_size = m_dwFileLen;
    data_size -= main_xref_offset;
    if (!data_size.IsValid())
      return kDataError;

    if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
            main_xref_offset, data_size.ValueOrDie()))
      return kDataNotAvailable;

    CPDF_Parser::Error eRet =
        m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
    m_bMainXRefLoadTried = true;
    if (eRet != CPDF_Parser::SUCCESS)
      return kDataError;

    if (!PreparePageItem())
      return kDataNotAvailable;

    m_bMainXRefLoadedOK = true;
    m_bLinearedDataOK = true;
  }

  return m_bLinearedDataOK ? kDataAvailable : kDataNotAvailable;
}

CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
    uint32_t dwPage,
    DownloadHints* pHints) {
  if (!m_pDocument)
    return kDataError;

  const int iPage = pdfium::base::checked_cast<int>(dwPage);
  if (iPage >= m_pDocument->GetPageCount()) {
    // This is XFA page.
    return kDataAvailable;
  }

  if (IsFirstCheck(dwPage)) {
    m_bCurPageDictLoadOK = false;
  }

  if (pdfium::Contains(m_pagesLoadState, dwPage))
    return kDataAvailable;

  const HintsScope hints_scope(GetValidator(), pHints);
  if (m_pLinearized) {
    if (dwPage == m_pLinearized->GetFirstPageNo()) {
      auto* pPageDict = m_pDocument->GetPageDictionary(iPage);
      if (!pPageDict)
        return kDataError;

      auto page_num_obj = std::make_pair(
          dwPage, std::make_unique<CPDF_PageObjectAvail>(
                      GetValidator(), m_pDocument.Get(), pPageDict));

      CPDF_PageObjectAvail* page_obj_avail =
          m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
      // TODO(art-snake): Check resources.
      return page_obj_avail->CheckAvail();
    }

    DocAvailStatus nResult = CheckLinearizedData();
    if (nResult != kDataAvailable)
      return nResult;

    if (m_pHintTables) {
      nResult = m_pHintTables->CheckPage(dwPage);
      if (nResult != kDataAvailable)
        return nResult;
      if (GetPageDictionary(dwPage)) {
        m_pagesLoadState.insert(dwPage);
        return kDataAvailable;
      }
    }

    if (!m_bMainXRefLoadedOK) {
      if (!LoadAllFile())
        return kDataNotAvailable;
      m_pDocument->GetParser()->RebuildCrossRef();
      ResetFirstCheck(dwPage);
      return kDataAvailable;
    }
    if (m_bTotalLoadPageTree) {
      if (!LoadPages())
        return kDataNotAvailable;
    } else {
      if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
        return kDataNotAvailable;
    }
  } else {
    if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
      return kDataNotAvailable;
    }
  }

  if (CheckAcroForm() == kFormNotAvailable)
    return kDataNotAvailable;

  auto* pPageDict = m_pDocument->GetPageDictionary(iPage);
  if (!pPageDict)
    return kDataError;

  {
    auto page_num_obj = std::make_pair(
        dwPage, std::make_unique<CPDF_PageObjectAvail>(
                    GetValidator(), m_pDocument.Get(), pPageDict));
    CPDF_PageObjectAvail* page_obj_avail =
        m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
    const DocAvailStatus status = page_obj_avail->CheckAvail();
    if (status != kDataAvailable)
      return status;
  }

  const DocAvailStatus resources_status = CheckResources(pPageDict);
  if (resources_status != kDataAvailable)
    return resources_status;

  m_bCurPageDictLoadOK = false;
  ResetFirstCheck(dwPage);
  m_pagesLoadState.insert(dwPage);
  return kDataAvailable;
}

CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
    CPDF_Dictionary* page) {
  DCHECK(page);
  CPDF_ReadValidator::ScopedSession read_session(GetValidator());
  CPDF_Object* resources = GetResourceObject(page);
  if (GetValidator()->has_read_problems())
    return kDataNotAvailable;

  if (!resources)
    return kDataAvailable;

  CPDF_PageObjectAvail* resource_avail =
      m_PagesResourcesAvail
          .insert(std::make_pair(
              resources, std::make_unique<CPDF_PageObjectAvail>(
                             GetValidator(), m_pDocument.Get(), resources)))
          .first->second.get();
  return resource_avail->CheckAvail();
}

RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
  return m_pFileRead;
}

CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
  return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
                     : m_parser.m_pSyntax.get();
}

int CPDF_DataAvail::GetPageCount() const {
  if (m_pLinearized)
    return m_pLinearized->GetPageCount();
  return m_pDocument ? m_pDocument->GetPageCount() : 0;
}

CPDF_Dictionary* CPDF_DataAvail::GetPageDictionary(int index) const {
  if (!m_pDocument || index < 0 || index >= GetPageCount())
    return nullptr;
  CPDF_Dictionary* page = m_pDocument->GetPageDictionary(index);
  if (page)
    return page;
  if (!m_pLinearized || !m_pHintTables)
    return nullptr;

  if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
    return nullptr;
  FX_FILESIZE szPageStartPos = 0;
  FX_FILESIZE szPageLength = 0;
  uint32_t dwObjNum = 0;
  const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
                                                     &szPageLength, &dwObjNum);
  if (!bPagePosGot || !dwObjNum)
    return nullptr;
  // We should say to the document, which object is the page.
  m_pDocument->SetPageObjNum(index, dwObjNum);
  // Page object already can be parsed in document.
  if (!m_pDocument->GetIndirectObject(dwObjNum)) {
    m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
        dwObjNum,
        ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument.Get()));
  }
  if (!ValidatePage(index))
    return nullptr;
  return m_pDocument->GetPageDictionary(index);
}

CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
    DownloadHints* pHints) {
  const HintsScope hints_scope(GetValidator(), pHints);
  return CheckAcroForm();
}

CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
  if (!m_pDocument)
    return kFormAvailable;

  if (m_pLinearized) {
    DocAvailStatus nDocStatus = CheckLinearizedData();
    if (nDocStatus == kDataError)
      return kFormError;
    if (nDocStatus == kDataNotAvailable)
      return kFormNotAvailable;
  }

  if (!m_pFormAvail) {
    CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
    if (!pRoot)
      return kFormAvailable;

    CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
    if (!pAcroForm)
      return kFormNotExist;

    m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
        GetValidator(), m_pDocument.Get(), pAcroForm);
  }
  switch (m_pFormAvail->CheckAvail()) {
    case kDataError:
      return kFormError;
    case kDataNotAvailable:
      return kFormNotAvailable;
    case kDataAvailable:
      return kFormAvailable;
    default:
      NOTREACHED();
  }
  return kFormError;
}

bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
  int iPage = pdfium::base::checked_cast<int>(dwPage);
  auto* pPageDict = m_pDocument->GetPageDictionary(iPage);
  if (!pPageDict)
    return false;
  CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument.Get(), pPageDict);
  return obj_avail.CheckAvail() == kDataAvailable;
}

std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
CPDF_DataAvail::ParseDocument(
    std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
    std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
    const ByteString& password) {
  if (m_pDocument) {
    // We already returned parsed document.
    return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
  }
  auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
                                                  std::move(pPageData));
  document->AddObserver(this);

  CPDF_ReadValidator::ScopedSession read_session(GetValidator());
  CPDF_Parser::Error error =
      document->LoadLinearizedDoc(GetValidator(), password);

  // Additional check, that all ok.
  if (GetValidator()->has_read_problems()) {
    NOTREACHED();
    return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
  }

  if (error != CPDF_Parser::SUCCESS)
    return std::make_pair(error, nullptr);

  m_pDocument = document.get();
  return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
}

CPDF_DataAvail::PageNode::PageNode() = default;

CPDF_DataAvail::PageNode::~PageNode() = default;
