{
  "_id": "6a10553cacfb0bcc41ca2476",
  "Package": "ORscraper",
  "Type": "Package",
  "Title": "Extract Information from Clinical Reports from 'Oncomine\nReporter' and NCBI 'ClinVar'",
  "Version": "0.1.1",
  "Authors@R": "c(\nperson(\"Samuel\", \"González\", email = \"samugonz0204@gmail.com\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0009-0007-9531-9821\")),\nperson(\"Antonio Jesus\", \"Canepa\", role = \"ctb\", comment = c(ORCID = \"0000-0002-0608-2743\")),\nperson(\"Patricia\", \"Saiz\", role = \"ctb\", comment = c(ORCID = \"0000-0001-7106-5192\")),\nperson(\"María\", \"González\", role = \"ctb\", comment = c(ORCID = \"0009-0000-1887-4644\"))\n)",
  "Description": "Clinical reports generated by 'Oncomine Reporter' software\ncontain critical data in unstructured PDF format, making manual\nextraction time-consuming and error-prone. 'ORscraper' provides\na coherent suite of functions to automate this process,\nallowing researchers to parse reports, identify key biomarkers,\nextract genetic variant tables, and filter results. It also\nintegrates with the NCBI 'ClinVar' API\n<https://www.ncbi.nlm.nih.gov/clinvar/> to enrich extracted\ndata.",
  "License": "MIT + file LICENSE",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.3",
  "SystemRequirements": "poppler-cpp (>= 0.73)",
  "Config/testthat/edition": "3",
  "VignetteBuilder": "knitr",
  "URL": "https://github.com/SamuelGonzalez0204/ORscraper",
  "BugReports": "https://github.com/SamuelGonzalez0204/ORscraper/issues",
  "Language": "en-US",
  "Config/pak/sysreqs": "libicu-dev libjpeg-dev libxml2-dev libssl-dev\nlibpoppler-cpp-dev poppler-data",
  "Repository": "https://samuelgonzalez0204.r-universe.dev",
  "Date/Publication": "2026-03-10 20:37:03 UTC",
  "RemoteUrl": "https://github.com/samuelgonzalez0204/orscraper",
  "RemoteRef": "HEAD",
  "RemoteSha": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-09 08:14:49 UTC",
    "User": "root"
  },
  "Author": "Samuel González [aut, cre] (ORCID:\n<https://orcid.org/0009-0007-9531-9821>),\nAntonio Jesus Canepa [ctb] (ORCID:\n<https://orcid.org/0000-0002-0608-2743>),\nPatricia Saiz [ctb] (ORCID: <https://orcid.org/0000-0001-7106-5192>),\nMaría González [ctb] (ORCID: <https://orcid.org/0009-0000-1887-4644>)",
  "Maintainer": "Samuel González <samugonz0204@gmail.com>",
  "MD5sum": "75b8fe4db01be4c823b2a6f8ef0dbfd6",
  "_user": "samuelgonzalez0204",
  "_type": "src",
  "_file": "ORscraper_0.1.1.tar.gz",
  "_fileid": "0fae88e3bb00c492074e0644b136fbda478d943fdb7770ffe76e1b6c3ccf8b56",
  "_filesize": 618136,
  "_sha256": "0fae88e3bb00c492074e0644b136fbda478d943fdb7770ffe76e1b6c3ccf8b56",
  "_created": "2026-05-09T08:14:49.000Z",
  "_published": "2026-05-22T13:08:12.063Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77385691491,
      "time": 137,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "6894253536"
    },
    {
      "job": 77385691696,
      "time": 128,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "6894253225"
    },
    {
      "job": 77385691736,
      "time": 144,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "6894251749"
    },
    {
      "job": 77385691063,
      "time": 121,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "6894250301"
    },
    {
      "job": 77385690577,
      "time": 220,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6894241010"
    },
    {
      "job": 77385690757,
      "time": 120,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7161243370"
    },
    {
      "job": 77385691730,
      "time": 89,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "6894249371"
    },
    {
      "job": 77385691763,
      "time": 82,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "6894248661"
    },
    {
      "job": 77385691459,
      "time": 86,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "6894248931"
    }
  ],
  "_buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/samuelgonzalez0204/orscraper",
  "_commit": {
    "id": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
    "author": "SamuelGonzalez0204 <sammugonz0204@gmail.com>",
    "committer": "SamuelGonzalez0204 <sammugonz0204@gmail.com>",
    "message": "cran-comments\n",
    "time": 1773175023
  },
  "_maintainer": {
    "name": "Samuel González",
    "email": "samugonz0204@gmail.com",
    "login": "samuelgonzalez0204",
    "uuid": 154916568
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.0.0",
      "role": "Depends"
    },
    {
      "package": "pdftools",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "role": "Imports"
    },
    {
      "package": "readxl",
      "role": "Imports"
    },
    {
      "package": "rentrez",
      "role": "Imports"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "mockery",
      "role": "Suggests"
    },
    {
      "package": "spelling",
      "role": "Suggests"
    }
  ],
  "_owner": "samuelgonzalez0204",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2026-02",
      "n": 5
    },
    {
      "week": "2026-03",
      "n": 1
    },
    {
      "week": "2026-10",
      "n": 2
    },
    {
      "week": "2026-11",
      "n": 3
    }
  ],
  "_tags": [
    {
      "name": "CRAN",
      "date": "2026-01-17"
    }
  ],
  "_stars": 0,
  "_contributors": [
    {
      "user": "samuelgonzalez0204",
      "count": 3,
      "uuid": 154916568
    }
  ],
  "_userbio": {
    "uuid": 154916568,
    "type": "user",
    "name": "SamuelGonzalez0204"
  },
  "_downloads": {
    "count": 474,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/ORscraper"
  },
  "_devurl": "https://github.com/samuelgonzalez0204/orscraper",
  "_searchresults": 0,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/ORscraper.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/samuelgonzalez0204/orscraper",
  "_realowner": "samuelgonzalez0204",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2026-01-16"
    },
    {
      "version": "0.1.1",
      "date": "2026-03-11"
    }
  ],
  "_exports": [
    "classify_biopsy",
    "extract_chip_id",
    "extract_fusions",
    "extract_intermediate_values",
    "extract_values_from_tables",
    "extract_values_start_end",
    "filter_pathogenic_only",
    "read_pdf_content",
    "read_pdf_files",
    "search_ncbi_clinvar"
  ],
  "_help": [
    {
      "page": "classify_biopsy",
      "title": "Determine the type of biopsy from identifiers",
      "topics": [
        "classify_biopsy"
      ]
    },
    {
      "page": "extract_chip_id",
      "title": "Extract numeric identifiers from file names",
      "topics": [
        "extract_chip_id"
      ]
    },
    {
      "page": "extract_fusions",
      "title": "Extract fusion variants from text",
      "topics": [
        "extract_fusions"
      ]
    },
    {
      "page": "extract_intermediate_values",
      "title": "Extract intermediate values from text lines",
      "topics": [
        "extract_intermediate_values"
      ]
    },
    {
      "page": "extract_values_from_tables",
      "title": "Extract values from tables within text",
      "topics": [
        "extract_values_from_tables"
      ]
    },
    {
      "page": "extract_values_start_end",
      "title": "Extract values from start or end patterns",
      "topics": [
        "extract_values_start_end"
      ]
    },
    {
      "page": "extract_variable",
      "title": "Extract variable value from text lines",
      "topics": [
        "extract_variable"
      ]
    },
    {
      "page": "filter_pathogenic_only",
      "title": "Filter for pathogenic results only",
      "topics": [
        "filter_pathogenic_only"
      ]
    },
    {
      "page": "narrow_text",
      "title": "Extract a subset of text based on start and end patterns",
      "topics": [
        "narrow_text"
      ]
    },
    {
      "page": "read_pdf_content",
      "title": "Read content from a PDF file",
      "topics": [
        "read_pdf_content"
      ]
    },
    {
      "page": "read_pdf_files",
      "title": "Read all PDF files in a directory",
      "topics": [
        "read_pdf_files"
      ]
    },
    {
      "page": "search_ncbi_clinvar",
      "title": "Search for pathogenicity information in NCBI ClinVar",
      "topics": [
        "search_ncbi_clinvar"
      ]
    },
    {
      "page": "search_value",
      "title": "Search for a specific value in text lines",
      "topics": [
        "search_value"
      ]
    }
  ],
  "_readme": "https://github.com/samuelgonzalez0204/orscraper/raw/HEAD/README.md",
  "_rundeps": [
    "askpass",
    "cellranger",
    "cli",
    "cpp11",
    "crayon",
    "curl",
    "glue",
    "hms",
    "httr",
    "jsonlite",
    "lifecycle",
    "magrittr",
    "mime",
    "openssl",
    "pdftools",
    "pillar",
    "pkgconfig",
    "prettyunits",
    "progress",
    "qpdf",
    "R6",
    "Rcpp",
    "readxl",
    "rematch",
    "rentrez",
    "rlang",
    "stringi",
    "stringr",
    "sys",
    "tibble",
    "utf8",
    "vctrs",
    "XML"
  ],
  "_vignettes": [
    {
      "source": "ORscraper.Rmd",
      "filename": "ORscraper.html",
      "title": "ORscraper",
      "author": "Samuel González",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Installation",
        "Data Structure and Input Format",
        "Input File Format",
        "Processing Multiple Files",
        "Recommended File Path Structure",
        "Recommended Naming Convention",
        "Language Compatibility",
        "Example Input File",
        "Core functions",
        "Example Workflow",
        "Step 1: Load and Process PDF Files",
        "Step 2: Extract Text from PDFs",
        "Step 3: Extract Key Information from Text",
        "Step 4: Extract Additional Information",
        "Step 5: Extract Genetic Mutation Data",
        "Step 6: Identify Gene Fusions",
        "Step 7: Search for Pathogenicity Information",
        "Step 8: Filter Pathogenic Mutations",
        "Step 9: Classify Biopsies",
        "Step 10: Extract Chip Identifier",
        "Additional Features",
        "Shiny App",
        "Conclusion"
      ],
      "created": "2026-01-11 20:27:06",
      "modified": "2026-01-11 20:27:06",
      "commits": 1
    }
  ],
  "_score": 4.301029995663981,
  "_indexed": true,
  "_nocasepkg": "orscraper",
  "_universes": [
    "samuelgonzalez0204"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.1.1",
      "date": "2026-05-09T08:17:06.000Z",
      "distro": "noble",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "04dbdad5f73cb8adc9bae4888a25f79d659c743cc5175a3ae88b630123c9b39c",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.1.1",
      "date": "2026-05-09T08:17:05.000Z",
      "distro": "noble",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "4e76ec96498fd8af156eea4f51516bbf978ac44f6d2fcb9e2c1ffd694610d026",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.1.1",
      "date": "2026-05-09T08:16:53.000Z",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "0cbff3686f39c037b3ed31624295136168b1237101860907e370d0fd95199d6e",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.1.1",
      "date": "2026-05-09T08:16:39.000Z",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "74c18b5de3c2b5ecdedcb7bfdc3274627beb91dcda5b41411b419ee0f9c458a1",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.1.1",
      "date": "2026-05-09T08:16:14.000Z",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "ce338a583160e1b90c5dc0170311510944aecd5c2d3902b6ff0d56fb8846af50",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.1.1",
      "date": "2026-05-09T08:16:06.000Z",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "d18f2a38ad280ecd2d948c78afa3304c97979449553623396495a5d948a1fee7",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.1.1",
      "date": "2026-05-09T08:16:08.000Z",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "9b07b68910d6a8643556f15a23c088c7d6f17ebe1ccd62289be190d2f90bf0b5",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.1.1",
      "date": "2026-05-22T13:07:45.000Z",
      "commit": "30ef489dbaa16a2884dd1231c8a43beb41bb8595",
      "fileid": "4a9044145faf5857aea2377ff2d061a105246300970ad8d733e2045d914c0bb6",
      "status": "success",
      "buildurl": "https://github.com/r-universe/samuelgonzalez0204/actions/runs/25596191019"
    }
  ]
}