diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..bb34b59
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,9 @@
+.git
+.github
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+*.Rcheck
+*.tar.gz
+docs/superpowers
diff --git a/.github/workflows/test-fast.yaml b/.github/workflows/test-fast.yaml
new file mode 100644
index 0000000..7576be3
--- /dev/null
+++ b/.github/workflows/test-fast.yaml
@@ -0,0 +1,39 @@
+name: test-fast
+
+on:
+ push:
+ branches: [main, master, develop]
+ pull_request:
+ branches: [main, master, develop]
+
+permissions:
+ contents: read
+
+jobs:
+ test-fast:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+ - uses: r-lib/actions/setup-r@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 # v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 # v2
+ with:
+ extra-packages: any::testthat
+ needs: check
+
+ - name: Install kaefa package for fast tests
+ run: R CMD INSTALL .
+
+ - name: Run fast productization tests
+ run: |
+ Rscript - <<'RSCRIPT'
+ reporter <- testthat::StopReporter$new()
+ testthat::test_file("tests/testthat/test-benchmark-manifest.R",
+ reporter = reporter)
+ testthat::test_file("tests/testthat/test-shiny-product-surface.R",
+ reporter = reporter)
+ RSCRIPT
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index dd31bce..6ddd063 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -11,6 +11,23 @@ It provides:
- optional remote worker initialization (`aefaInit`),
- an interactive Shiny UI (`launchAEFA`).
+## Productization Boundaries
+
+The repository remains a monorepo until the product surface has independent
+release or deployment needs. Use these boundaries when planning sale-readiness
+work:
+
+- `kaefa-core`: the R/statistical engine boundary around `aefa()`,
+ `engineAEFA()`, model selection, item-fit evaluation, theta-prior utilities,
+ and benchmark evidence.
+- `kaefa-studio`: the buyer-facing UI boundary around `launchAEFA()` and
+ `inst/shiny-app/app.R`.
+- `kaefa-runner`: the future deployment and execution boundary for container,
+ hosted, remote, or scheduled analysis workflows.
+
+Do not introduce a git submodule unless a downstream buyer or deployment model
+explicitly requires vendored source integration.
+
## Repository Layout
- `R/kaefa.R`: public orchestration entry points and exported runtime behavior.
diff --git a/DESCRIPTION b/DESCRIPTION
index 3ad2079..e7ed53d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,6 +31,7 @@ LazyData: true
Repository: CRAN
Suggests: covr,
testthat,
+ pkgload,
knitr,
rmarkdown
VignetteBuilder: knitr
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..24eef29
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,28 @@
+FROM r-base:4.4.2
+
+ENV R_REPOS=https://cloud.r-project.org
+
+WORKDIR /opt/kaefa
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ cmake \
+ libcurl4-openssl-dev \
+ libssl-dev \
+ libxml2-dev \
+ libuv1-dev \
+ pandoc \
+ xz-utils \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY DESCRIPTION /opt/kaefa/DESCRIPTION
+
+RUN Rscript -e 'desc <- read.dcf("DESCRIPTION"); deps <- unique(unlist(strsplit(paste(desc[1, c("Depends", "Imports")], collapse = ","), ",", fixed = TRUE))); deps <- trimws(deps); deps <- sub("[[:space:]].*$", "", deps); deps <- setdiff(deps[nzchar(deps)], c("R", "parallel")); install.packages(deps, repos = Sys.getenv("R_REPOS"))'
+
+COPY . /opt/kaefa
+
+RUN R CMD INSTALL .
+
+EXPOSE 3838
+
+CMD ["Rscript", "-e", "kaefa::launchAEFA(host = '0.0.0.0', port = 3838)"]
diff --git a/LICENSE b/LICENSE
index 69fc4ef..f288702 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1 +1,674 @@
-NA
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/TRD.md b/TRD.md
index a74ae43..0767c89 100644
--- a/TRD.md
+++ b/TRD.md
@@ -75,6 +75,7 @@ The package exposes a programmatic API and an optional Shiny interface.
manager with regular rotation and least-privilege access. See
[Security and Privacy](#security-and-privacy).
Example usage with `aefaInit()`:
+
```r
# vector aligned with RemoteClusters
aefaInit(
@@ -120,6 +121,26 @@ The package exposes a programmatic API and an optional Shiny interface.
- R CMD check on Windows, macOS, and Linux in CI.
- Unit tests in `tests/` for core logic and regressions.
- Example workflows in README and vignettes for smoke validation.
+- Fast PR tests should validate metadata, documentation, and lightweight
+ behavior without requiring heavy model-fitting runs.
+- Release benchmark tests should validate selected datasets from
+ `inst/benchmarks/manifest.csv` before tagged releases or buyer-facing
+ diligence packages.
+- Private benchmark tests should run outside public CI when datasets are
+ restricted, with only reviewed aggregate evidence committed back to docs.
+
+## Productization Boundaries
+
+- `kaefa-core`: R package APIs, statistical engine, benchmark evidence, and
+ reproducible result objects.
+- `kaefa-studio`: bundled Shiny UI, report export, user-facing validation, and
+ future hosted product workflow.
+- `kaefa-runner`: future container, ShinyProxy, remote execution, monitoring,
+ and scheduled benchmark operations.
+
+Keep these as documented boundaries before splitting repositories. Split only
+when release cadence, runtime dependencies, or buyer deployment requirements
+make a separate package or repository materially simpler.
## Security and Privacy
diff --git a/deploy/shinyproxy/application.yml.example b/deploy/shinyproxy/application.yml.example
new file mode 100644
index 0000000..e116cb6
--- /dev/null
+++ b/deploy/shinyproxy/application.yml.example
@@ -0,0 +1,20 @@
+proxy:
+ title: kaefa Studio
+ port: 8080
+ authentication: simple
+ users:
+ - name: analyst
+ password: ${KAEFA_SHINYPROXY_ANALYST_PASSWORD}
+ groups: kaefa-users
+ specs:
+ - id: kaefa-studio
+ display-name: kaefa Studio
+ description: Automated exploratory factor analysis workspace
+ container-image: kaefa-studio:local
+ container-network: kaefa-net
+ access-groups: [kaefa-users]
+ container-volumes: []
+ container-memory-request: 4g
+ container-memory-limit: 8g
+ container-cpu-request: 2
+ container-cpu-limit: 4
diff --git a/docs/business/2b-krw-commercial-model.md b/docs/business/2b-krw-commercial-model.md
new file mode 100644
index 0000000..5cbbae7
--- /dev/null
+++ b/docs/business/2b-krw-commercial-model.md
@@ -0,0 +1,76 @@
+# 2B KRW Commercial Model
+
+## Purpose
+
+This model defines what must become true for `kaefa` to support a 2B KRW
+acquisition case, equivalent to a 20억 KRW sale target. It is not a present-day
+valuation claim.
+
+## Valuation Threshold
+
+| Exit multiple | ARR needed for 2B KRW value |
+| --- | ---: |
+| 3x ARR | 667M KRW |
+| 4x ARR | 500M KRW |
+| 5x ARR | 400M KRW |
+| Strategic IP sale | Lower ARR possible with stronger evidence |
+
+The practical target is 400M-700M KRW ARR unless the buyer is acquiring
+strategic statistical capability rather than recurring revenue.
+
+## Pricing Paths
+
+### Annual Institution License
+
+Sell annual access to `kaefa-studio`, validation reports, and support for
+university labs, assessment vendors, and research teams.
+
+Evidence needed:
+
+- signed annual contracts,
+- named user or site scope,
+- support SLA,
+- renewal terms,
+- usage logs that do not expose private respondent data.
+
+### Hosted Assessment Analytics Workspace
+
+Sell a managed deployment where the buyer or customer uploads datasets and
+receives standardized reports.
+
+Evidence needed:
+
+- deployment architecture,
+- privacy and retention policy,
+- runtime cost per analysis,
+- uptime and monitoring proof,
+- reproducibility bundle export.
+
+### Productized Validation Services
+
+Sell expert-assisted validation projects where the repeatable software output is
+explicitly separated from consulting labor.
+
+Evidence needed:
+
+- standard statement of work,
+- fixed report template,
+- before/after analyst time saved,
+- conversion path from service project to subscription.
+
+## Minimum Sale-Readiness Metrics
+
+- 3-5 paid pilots completed with written acceptance.
+- Median time-to-report measured on at least three benchmark classes.
+- At least one repeatable deployment path.
+- License/IP posture reviewed.
+- Fast PR tests and release benchmark tests documented.
+- Known failure classes return structured explanations.
+
+## Buyer Story
+
+The strongest buyer story is not "an R package exists." It is:
+
+`kaefa` turns complex IRT/EFA model search into a repeatable report workflow,
+with benchmark evidence, deployable UI, and supportable operations for
+assessment teams that cannot afford manual model exploration for every dataset.
diff --git a/docs/business/pilot-scorecard.md b/docs/business/pilot-scorecard.md
new file mode 100644
index 0000000..b30d94e
--- /dev/null
+++ b/docs/business/pilot-scorecard.md
@@ -0,0 +1,55 @@
+# Pilot Scorecard
+
+## Purpose
+
+Every paid or strategic pilot should produce comparable evidence for product
+fit, technical reliability, and acquisition readiness.
+
+## Scorecard Fields
+
+| Field | Scoring Guidance |
+| --- | --- |
+| Pilot owner | Named accountable owner |
+| Customer segment | University, vendor, HR, education, or consulting |
+| Dataset complexity | Rows, items, response type, missingness, covariates |
+| Successful report generation | Yes, partial, or no |
+| Time to report | Wall-clock time from upload or API call to export |
+| Analyst intervention | None, light, heavy, or impossible |
+| Failure explainability | Structured reason and next action, or raw error |
+| Security constraints | Local, hosted, private runner, or restricted |
+| Willingness to pay | Annual license, service fee, support fee, or none |
+| Renewal path | Clear, possible, unclear, or no |
+| Reference value | Public reference, private reference, or internal only |
+
+## Acceptance Levels
+
+### Green
+
+- Report generated successfully.
+- Result was understandable to the pilot owner.
+- No private data handling concern remains unresolved.
+- Customer expresses a paid renewal or expansion path.
+
+### Yellow
+
+- Report generated with analyst intervention.
+- Runtime or UX issues are tolerable but must be fixed.
+- Customer value exists, but packaging or pricing is unclear.
+
+### Red
+
+- Model fails without a structured explanation.
+- Dataset cannot be used under the current privacy model.
+- Customer does not see a paid path after the pilot.
+
+## Evidence To Save
+
+- signed pilot scope or email approval,
+- dataset metadata without private respondent data,
+- selected options,
+- runtime measurement,
+- exported report,
+- customer feedback,
+- renewal or expansion decision.
+
+Do not commit private datasets, credentials, or customer-identifying raw data.
diff --git a/docs/diligence/data-room-index.md b/docs/diligence/data-room-index.md
new file mode 100644
index 0000000..1162fee
--- /dev/null
+++ b/docs/diligence/data-room-index.md
@@ -0,0 +1,87 @@
+# Data Room Index
+
+Use this index when packaging `kaefa` for a buyer, pilot sponsor, or strategic
+partner. It points to the current evidence and separates completed artifacts
+from decisions that still need owner, legal, or customer input.
+
+## Product And Architecture
+
+- Product scope:
+ `docs/product/kaefa-studio-requirements.md`.
+ Status: ready for pilot review.
+- Internal boundaries:
+ `ARCHITECTURE.md`, `TRD.md`.
+ Status: monorepo boundaries documented.
+- Repository split decision:
+ `docs/product/kaefa-studio-requirements.md`.
+ Status: defer until auth, tenancy, independent deployment, non-R frontend,
+ or buyer requirement.
+
+## Commercial Evidence
+
+- Acquisition case:
+ `docs/business/2b-krw-commercial-model.md`.
+ Status: target model documented.
+- Pilot scoring:
+ `docs/business/pilot-scorecard.md`.
+ Status: scorecard ready.
+- Missing revenue proof:
+ `docs/business/2b-krw-commercial-model.md`.
+ Status: paid pilots and ARR evidence needed.
+
+## Validation And Quality
+
+- Fast PR tests:
+ `.github/workflows/test-fast.yaml`, `tests/FAST_TESTS.md`.
+ Status: fast productization gate added.
+- Benchmark evidence:
+ `inst/benchmarks/manifest.csv`,
+ `docs/validation/benchmark-protocol.md`.
+ Status: manifest and protocol ready.
+- Release checks:
+ `docs/diligence/release-diligence-checklist.md`.
+ Status: checklist ready.
+
+## Deployment And Operations
+
+- Studio runtime:
+ `Dockerfile`, `docs/operations/deployment.md`.
+ Status: local container smoke tested.
+- Hosted deployment example:
+ `deploy/shinyproxy/application.yml.example`.
+ Status: evaluation example ready.
+- Production hardening:
+ `docs/operations/deployment.md`.
+ Status: auth, HTTPS, secrets, resource sizing, and measured runtime budgets
+ remain.
+
+## Security Privacy And IP
+
+- Uploaded data handling:
+ `docs/product/kaefa-studio-requirements.md`,
+ `docs/operations/deployment.md`.
+ Status: no persistence unless user exports.
+- License posture:
+ `LICENSE`, `DESCRIPTION`, `docs/diligence/license-and-ip.md`.
+ Status: GPL posture documented.
+- Legal decisions:
+ `docs/diligence/license-and-ip.md`.
+ Status: owner/legal confirmation required before proprietary claims.
+
+## Design And Roadmap
+
+- Productization roadmap:
+ `docs/superpowers/plans/2026-07-02-kaefa-2b-krw-sale-readiness.md`.
+ Status: execution checklist current.
+- Visual roadmap:
+ FigJam roadmap: .
+ Status: created without Figma Code Connect.
+
+## Missing Before Sale Claim
+
+- Owner/legal signoff on copyright, GPL dependency implications, and buyer
+ deliverables.
+- At least 3-5 paid or strategic pilots scored with the pilot scorecard.
+- Benchmark runtime and accuracy evidence for agreed dataset classes.
+- Production authentication, HTTPS, secret management, monitoring, and support
+ posture if the sale includes hosted operation.
diff --git a/docs/diligence/license-and-ip.md b/docs/diligence/license-and-ip.md
new file mode 100644
index 0000000..8e9cb5b
--- /dev/null
+++ b/docs/diligence/license-and-ip.md
@@ -0,0 +1,65 @@
+# License And IP Diligence
+
+## Current Package License
+
+`DESCRIPTION` currently declares:
+
+```text
+License: GPL-3 + file LICENSE
+```
+
+The repository `LICENSE` file now begins:
+
+```text
+GNU GENERAL PUBLIC LICENSE
+Version 3, 29 June 2007
+```
+
+The prior placeholder `LICENSE` file has been replaced with the GPL-3 text
+distributed with the local R runtime. This resolves the repository-file
+mismatch, but it does not resolve ownership, contributor-rights, or commercial
+redistribution decisions.
+
+## Dependency Constraints
+
+The containerized Studio runtime confirmed these dependency license facts:
+
+- `mirt`: GPL (>= 3)
+- `future`: LGPL (>= 2.1)
+- `shiny`: MIT + file LICENSE
+- `DT`: MIT + file LICENSE
+- `fitdistrplus`: GPL (>= 2)
+
+## Working Commercial Posture
+
+Until owner and legal review explicitly approve another structure, treat the
+sellable product as:
+
+- GPL-compatible statistical core,
+- paid implementation and support services,
+- optional hosted deployment operated by the seller or buyer,
+- commercial reporting templates and benchmark evidence packaged around the
+ core.
+
+Do not claim a proprietary source-code sale, dual license, or open-core split
+until copyright ownership and GPL dependency implications are reviewed.
+
+## Required Owner And Legal Checks
+
+- Confirm copyright ownership for all substantial contributions.
+- Confirm whether every contributor assigned rights or contributed under terms
+ compatible with the intended sale.
+- Confirm whether GPL dependencies constrain proprietary redistribution or
+ embedding.
+- Decide whether the buyer receives source code, hosted service rights,
+ support/maintenance rights, or a mixed package.
+- Confirm whether `License: GPL-3 + file LICENSE` should remain as-is or be
+ simplified for CRAN/package convention while preserving the repository GPL-3
+ notice.
+
+## Recommended Next Decision
+
+Keep the current GPL-compatible R package as `kaefa-core` unless legal review
+authorizes a dual-license model. Build sale value through benchmark evidence,
+enterprise deployment, support, reporting, and paid pilots before attempting a
+repository split.
diff --git a/docs/diligence/release-diligence-checklist.md b/docs/diligence/release-diligence-checklist.md
new file mode 100644
index 0000000..16f3533
--- /dev/null
+++ b/docs/diligence/release-diligence-checklist.md
@@ -0,0 +1,56 @@
+# Release Diligence Checklist
+
+Use this checklist before presenting `kaefa` as a buyer-facing product,
+enterprise pilot, or 2B KRW acquisition candidate.
+
+## Product Scope
+
+- Data room index points to current evidence and unresolved decisions.
+- Current public APIs are listed and intentionally preserved.
+- `kaefa-core`, `kaefa-studio`, and `kaefa-runner` boundaries are documented.
+- Known out-of-scope features are named.
+- User-facing workflows have success and failure states.
+
+## Install And Deployment Proof
+
+- R package installation works from the target branch.
+- Shiny launch path works with documented dependencies.
+- Hosted or container deployment path is tested when offered to buyers.
+- Runtime dependency versions are recorded.
+
+## Benchmark Evidence
+
+- `inst/benchmarks/manifest.csv` passes schema validation.
+- Each benchmark dataset has source and license context.
+- Expected factor-count ranges are reviewed.
+- Runtime measurements name hardware or CI runner class.
+- Heavy benchmarks are separated from fast PR tests.
+
+## Security And Privacy
+
+- No secrets, SSH keys, or customer data are committed.
+- Uploaded Shiny data is not persisted unless the user exports it.
+- Remote execution setup documents SSH key handling.
+- Dependency review and security scans are tracked.
+
+## License And IP
+
+- `DESCRIPTION` and `LICENSE` agree.
+- Dependency license implications are documented.
+- Copyright ownership and contributor rights are reviewed.
+- Commercial posture is explicit: GPL-only, dual-license, hosted service, or
+ open-core.
+
+## Revenue And Pilots
+
+- Pricing path is selected.
+- Paid pilots are scored with a consistent scorecard.
+- ARR needed for 2B KRW valuation is stated.
+- Renewal, expansion, or strategic buyer path is documented.
+
+## Known Limitations
+
+- Missing dependencies are listed.
+- Unsupported data shapes are listed.
+- Non-convergence behavior is documented.
+- Legal or owner decisions are marked separately from engineering blockers.
diff --git a/docs/operations/deployment.md b/docs/operations/deployment.md
new file mode 100644
index 0000000..369cdb5
--- /dev/null
+++ b/docs/operations/deployment.md
@@ -0,0 +1,54 @@
+# Deployment
+
+## Local Container Smoke Test
+
+Build the local image:
+
+```bash
+podman build -t kaefa-studio:local .
+```
+
+Run the Shiny app on port 3838:
+
+```bash
+podman run --rm -p 3838:3838 kaefa-studio:local
+```
+
+Then open:
+
+```text
+http://localhost:3838
+```
+
+Docker can be used instead of Podman with the same `build` and `run`
+arguments.
+
+## ShinyProxy Example
+
+`deploy/shinyproxy/application.yml.example` provides a minimal ShinyProxy
+configuration for an internal evaluation deployment.
+It relies on the Dockerfile `CMD` for the app startup command.
+
+Before using it outside a local pilot:
+
+- set `KAEFA_SHINYPROXY_ANALYST_PASSWORD` from the deployment secret manager,
+- set memory and CPU limits from measured benchmark runs,
+- decide whether uploaded data must stay local-only,
+- add HTTPS and organization authentication.
+
+## Runtime Notes
+
+The container installs the current source package and runs:
+
+```r
+kaefa::launchAEFA(host = "0.0.0.0", port = 3838)
+```
+
+The app must not persist uploaded datasets unless a future feature explicitly
+adds reviewed storage behavior.
+
+## Current Limits
+
+- The image is intended for evaluation, not hardened production.
+- Runtime cost and memory requirements still need benchmark measurements.
+- Authentication in the ShinyProxy example is a placeholder.
diff --git a/docs/product/kaefa-studio-requirements.md b/docs/product/kaefa-studio-requirements.md
new file mode 100644
index 0000000..13c99bc
--- /dev/null
+++ b/docs/product/kaefa-studio-requirements.md
@@ -0,0 +1,78 @@
+# kaefa Studio Requirements
+
+## Purpose
+
+`kaefa-studio` is the buyer-facing product surface for researchers, assessment
+teams, and consultants who need automated exploratory factor analysis without
+writing R code. The current implementation is the bundled Shiny app in
+`inst/shiny-app/app.R`; this document defines the minimum product behavior
+before any separate app repository or submodule is justified.
+
+## Primary Workflow
+
+1. Upload a numeric CSV response dataset.
+2. Validate row count, item count, column types, and factor range.
+3. Choose minimum factors, maximum factors, rotation, and model-selection
+ criteria.
+4. Run `kaefa::aefa()` with visible progress and clear runtime expectations.
+5. Inspect model summary, item fit, factor loadings, and fit indices.
+6. Export an RDS result object.
+7. Export a human-readable report.
+8. Export a reproducibility bundle in a future release.
+
+## Required Failure States
+
+### Non-Numeric Columns
+
+Show a blocking error that names the invalid columns and explains that item
+response data must be numeric before factor analysis can run.
+
+### Factor Count Exceeds Item Count
+
+Block analysis before calling `aefa()` and tell the user the maximum allowed
+factor count for the uploaded dataset.
+
+### Missing Runtime Dependencies
+
+If `shiny`, `DT`, or package dependencies are unavailable, the app launch path
+must fail before user upload with an install-focused message.
+
+### Long-Running Model
+
+After analysis starts, keep an obvious running state visible. If a timeout is
+introduced later, return a structured timeout result instead of discarding
+partial diagnostic context.
+
+### Model Non-Convergence
+
+Return a structured failure reason, the last attempted model context when safe,
+and suggested next actions such as reducing maximum factors, checking low
+variance items, or switching to a smaller pilot dataset.
+
+## Buyer-Facing Requirements
+
+- The first screen must make the product task clear: upload assessment data,
+ configure automated factor search, and export a report.
+- Reports must explain what was selected, why it was selected, and what items
+ were removed or flagged.
+- The app must avoid storing uploaded data unless the user explicitly exports a
+ result.
+- Every exported report should include package version, run timestamp, selected
+ options, and data shape.
+- Default settings should be safe for a first pilot dataset, not optimized for
+ maximum search breadth.
+
+## Split Criteria
+
+Do not split `kaefa-studio` into a separate repository yet.
+
+Split it only when at least one of these is true:
+
+- it needs authentication or tenant isolation,
+- it needs hosted deployment independent of the R package release,
+- it adds non-R frontend build tooling,
+- it needs independent issue tracking and release notes,
+- a buyer specifically requires a separate deployable source package.
+
+Prefer a normal repository or package split over a git submodule. Use a
+submodule only when a buyer explicitly requires vendored source integration.
diff --git a/docs/superpowers/plans/2026-07-02-kaefa-2b-krw-sale-readiness.md b/docs/superpowers/plans/2026-07-02-kaefa-2b-krw-sale-readiness.md
new file mode 100644
index 0000000..a83eb64
--- /dev/null
+++ b/docs/superpowers/plans/2026-07-02-kaefa-2b-krw-sale-readiness.md
@@ -0,0 +1,566 @@
+# kaefa 2B KRW Sale Readiness Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use
+> superpowers:subagent-driven-development (recommended) or
+> superpowers:executing-plans to implement this plan task-by-task. Steps use
+> checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Turn `kaefa` from a research-grade R package plus bundled Shiny app
+into a diligence-ready product candidate that can support a 2B KRW acquisition
+case. In Korean valuation terms this is the 20억 KRW target from the request,
+not a 20B KRW target.
+
+**Architecture:** Keep one repository for the next execution phase, but enforce
+clear package boundaries: `kaefa-core` as the R/statistical engine boundary,
+`kaefa-studio` as the product UI boundary, and `kaefa-runner` as the future
+execution/deployment boundary. Do not introduce a submodule until the UI or
+runner has an independent release cadence, dependency stack, and buyer-facing
+deployment target.
+
+**Tech Stack:** R package, `mirt`, `future`, `testthat`, Shiny, DT, GitHub
+Actions, FigJam/Figma diagrams without Figma Code Connect.
+
+## Global Constraints
+
+- Preserve current public R APIs unless a task explicitly changes an interface:
+ `aefa()`, `engineAEFA()`, `aefaInit()`, `aefaResults()`,
+ `launchAEFA()`, `fitThetaPrior()`, `testThetaPriorCalibration()`, and
+ `applyThetaPrior()`.
+- Use `README.Rmd` as the source for `README.md`; regenerate both together.
+- Keep secrets, SSH keys, customer data, and assessment data out of git.
+- Treat review approval as non-blocking for this roadmap. Treat broken CI,
+ failing reproducible tests, missing legal/license authority, or unavailable
+ runtime dependencies as real blockers.
+- Do not use Figma Code Connect for this work.
+
+---
+
+## Evidence Snapshot
+
+- Repository: `ContextualWisdomLab/kaefa`, public, default branch `develop`.
+- Current head audited: `334a5484c5e1d0c35e3f8e575f0dc6eb29c39da0`.
+- Live open PRs: Dependabot updates `#55`, `#57`, `#58`, `#59`, `#60`; all
+ blocked by review requirement, not by a proven technical failure.
+- Live open issues relevant to productization: GPU integration `#49`, model
+ comparison via `nonnest2` `#48`, parameter calibration with benchmark datasets
+ `#46`, and full lint/refactor `#45`.
+- Product surface today: R package API, bundled Shiny app in
+ `inst/shiny-app/app.R`, documentation in `PRD.md`, `TRD.md`,
+ `ARCHITECTURE.md`, README/vignettes, and a broad test suite.
+- Local validation today: R 4.6.0 is available; `mirt` and `testthat` are
+ installed; `shiny`, `DT`, and `fitdistrplus` are not installed locally.
+- CI gap today: `.github/workflows/R-CMD-check.yaml` runs
+ `check-r-package` with `args: 'c("--no-manual", "--no-tests")'`, so the large
+ test suite is not currently a required package-check gate.
+- License risk today: `DESCRIPTION` declares `GPL-3 + file LICENSE`, but
+ `LICENSE` currently contains only `NA`. Dependency license review also matters
+ because `mirt` is GPL (>= 3).
+- Figma output: FigJam roadmap created at
+ .
+
+## Product Decision
+
+The correct first move is not a submodule split. The current repository is still
+small enough that a submodule would add coordination overhead without solving
+the buyer-facing risks: validation evidence, deployability, supportability,
+license clarity, and revenue proof.
+
+Use this staged boundary instead:
+
+1. **Now:** keep a monorepo and document internal boundaries.
+2. **After validation gates pass:** move reusable statistical orchestration into
+ a narrower `kaefa-core` R-package boundary inside `R/`.
+3. **After a hosted or enterprise UI exists:** split `kaefa-studio` into a
+ separate app repo or package if it needs independent deployment, auth,
+ monitoring, or non-R frontend dependencies.
+4. **After remote execution has real operational semantics:** create
+ `kaefa-runner` as a separate service or deployment package, not a submodule,
+ unless a buyer specifically requires source-vendored integration.
+
+## 2B KRW Valuation Frame
+
+This plan treats 2B KRW as an acquisition value target, not as a claim that the
+current repository is worth 2B KRW today.
+
+The practical target is one of:
+
+- **Revenue-multiple path:** 400M-700M KRW ARR with credible retention and gross
+ margin, assuming roughly 3x-5x ARR valuation multiples for a niche private
+ software asset.
+- **Strategic-IP path:** fewer customers, but strong validation evidence,
+ defensible benchmark results, and a buyer that needs automated IRT/EFA
+ capability inside a broader assessment platform.
+- **Services-to-product path:** 3-5 paid pilots converted into reusable product
+ workflows, with implementation services explicitly separated from repeatable
+ software revenue.
+
+The revenue-multiple path is the cleanest buyer story. The strategic-IP path is
+the fallback if the market is specialized but the algorithmic validation is
+strong.
+
+## KPI Framework
+
+Primary sale-readiness KPIs:
+
+- **Activated analysis:** percent of new datasets that produce an exported report
+ without maintainer intervention.
+- **Time to report:** median wall-clock time from CSV upload or R API call to a
+ buyer-readable result package.
+- **Benchmark agreement:** percent of benchmark datasets where selected factor
+ count/model choice matches a reviewed expected result or documented acceptable
+ range.
+- **Model failure explainability:** percent of failed analyses that return a
+ structured reason and next action instead of silent `NULL` or raw R errors.
+- **Paid pilot conversion:** paid pilots signed, completed, and converted to
+ recurring license or annual support.
+
+Guardrails:
+
+- No customer data leaves the user-controlled runtime without explicit
+ configuration.
+- No uploaded Shiny dataset is persisted beyond the session unless the user
+ explicitly exports it.
+- Dependency and license posture must be explainable to a buyer.
+- CI must prove the fast test suite on every PR and the heavier benchmark suite
+ on schedule or release.
+
+## Task 1: License And Diligence Baseline
+
+**Files:**
+
+- Modify: `LICENSE`
+- Modify: `DESCRIPTION`
+- Create: `docs/diligence/license-and-ip.md`
+
+**Interfaces:**
+
+- Consumes: current `DESCRIPTION` license field and dependency list.
+- Produces: a buyer-readable license/IP note and a corrected repository license
+ posture.
+
+- [x] **Step 1: Decide sale posture**
+
+ Record whether the sellable asset remains GPL-only, becomes dual-licensed, or
+ separates open-source core from commercial UI/support. If no legal authority
+ is available, mark this task blocked on owner/legal input instead of guessing.
+
+- [x] **Step 2: Fix the license file**
+
+ If GPL-3 remains the current posture, replace `LICENSE` with the full GPL-3
+ text or the correct CRAN-compatible license file expected by R packaging.
+
+- [x] **Step 3: Document dependency license implications**
+
+ Create `docs/diligence/license-and-ip.md` with:
+
+ ```md
+ # License And IP Diligence
+
+ ## Current package license
+
+ kaefa declares `GPL-3 + file LICENSE` in `DESCRIPTION`.
+
+ ## Dependency constraints
+
+ - `mirt`: GPL (>= 3)
+ - `future`: LGPL (>= 2.1)
+ - `shiny`: required for UI runtime
+ - `DT`: required for UI tables
+ - `fitdistrplus`: required for theta-prior utilities
+
+ ## Sale-readiness decision
+
+ The current working posture is GPL core plus commercial services and hosted
+ deployment, until owner/legal review explicitly approves a dual-license or
+ open-core sale structure.
+
+ ## Required owner/legal checks
+
+ - Confirm copyright ownership for all substantial contributions.
+ - Confirm whether GPL dependencies constrain proprietary distribution.
+ - Confirm buyer deliverables: source sale, hosted service, or support contract.
+ ```
+
+- [x] **Step 4: Verify**
+
+ Run:
+
+ ```bash
+ Rscript -e 'read.dcf("DESCRIPTION")[1,"License"]'
+ ```
+
+ Expected: prints the selected license field without parsing errors.
+
+## Task 2: Fast, Required CI Test Gate
+
+**Files:**
+
+- Preserve: `.github/workflows/R-CMD-check.yaml`
+- Create: `.github/workflows/test-fast.yaml`
+- Create: `tests/FAST_TESTS.md`
+
+**Interfaces:**
+
+- Consumes: existing `tests/testthat/*.R` suite.
+- Produces: a PR-required fast test gate that does not depend on full benchmark
+ runtime.
+
+- [x] **Step 1: Keep R CMD check but stop pretending it runs tests**
+
+ Leave `R-CMD-check` as package-install validation if runtime is too heavy, but
+ add a separate workflow named `test-fast` that runs explicit targeted tests.
+
+- [x] **Step 2: Add fast workflow**
+
+ Create `.github/workflows/test-fast.yaml`:
+
+
+
+ ```yaml
+ name: test-fast
+
+ on:
+ pull_request:
+ branches: [main, master, develop]
+ push:
+ branches: [main, master, develop]
+
+ permissions:
+ contents: read
+
+ jobs:
+ test-fast:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ - uses: r-lib/actions/setup-r@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 # v2
+ with:
+ use-public-rspm: true
+ - uses: r-lib/actions/setup-r-dependencies@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 # v2
+ with:
+ extra-packages: any::testthat
+ needs: check
+ - name: Run fast productization tests
+ run: |
+ Rscript - <<'RSCRIPT'
+ reporter <- testthat::StopReporter$new()
+ testthat::test_file("tests/testthat/test-benchmark-manifest.R",
+ reporter = reporter)
+ testthat::test_file("tests/testthat/test-shiny-product-surface.R",
+ reporter = reporter)
+ RSCRIPT
+ ```
+
+
+
+- [x] **Step 3: Document heavy suite policy**
+
+ Create `tests/FAST_TESTS.md` to state which tests are fast PR gates and
+ which tests are scheduled/release benchmark gates.
+
+- [x] **Step 4: Verify**
+
+ Run:
+
+ ```bash
+ npx -y markdownlint-cli2@0.20.0 tests/FAST_TESTS.md
+ ```
+
+ Expected: no markdownlint errors.
+
+## Task 3: Benchmark Corpus And Validation Harness
+
+**Files:**
+
+- Create: `inst/benchmarks/README.md`
+- Create: `inst/benchmarks/manifest.csv`
+- Create: `tests/testthat/test-benchmark-manifest.R`
+- Create: `docs/validation/benchmark-protocol.md`
+
+**Interfaces:**
+
+- Consumes: current `aefa()` and `engineAEFA()` APIs.
+- Produces: a repeatable evidence base for buyer diligence.
+
+- [x] **Step 1: Define benchmark manifest**
+
+ Create `inst/benchmarks/manifest.csv` with columns:
+
+ ```csv
+ dataset_id,source,license,rows,items,response_type,expected_factor_min,expected_factor_max,expected_runtime_seconds,notes
+ science,mirt::Science,mirt,392,4,mixed,1,2,120,Smoke
+ ```
+
+- [x] **Step 2: Add manifest test**
+
+ Create `tests/testthat/test-benchmark-manifest.R`:
+
+ ```r
+ test_that("benchmark manifest has required columns", {
+ manifest <- read.csv(test_path("../../inst/benchmarks/manifest.csv"))
+ expect_true(all(c(
+ "dataset_id", "source", "license", "rows", "items", "response_type",
+ "expected_factor_min", "expected_factor_max",
+ "expected_runtime_seconds", "notes"
+ ) %in% names(manifest)))
+ expect_false(anyDuplicated(manifest$dataset_id))
+ })
+ ```
+
+- [x] **Step 3: Write protocol**
+
+ `docs/validation/benchmark-protocol.md` must define:
+
+ - dataset admission criteria,
+ - expected-result review process,
+ - runtime measurement method,
+ - acceptable failure categories,
+ - release-signoff requirements.
+
+- [x] **Step 4: Verify**
+
+ Run:
+
+ ```bash
+ Rscript -e 'testthat::test_file("tests/testthat/test-benchmark-manifest.R")'
+ ```
+
+ Expected: one passing test file.
+
+## Task 4: Product UI Boundary
+
+**Files:**
+
+- Modify: `inst/shiny-app/app.R`
+- Create: `docs/product/kaefa-studio-requirements.md`
+
+**Interfaces:**
+
+- Consumes: existing Shiny app and `launchAEFA()`.
+- Produces: a clear `kaefa-studio` product surface without splitting repos yet.
+
+- [x] **Step 1: Document buyer-facing UI workflow**
+
+ Create `docs/product/kaefa-studio-requirements.md` with the minimum workflow:
+ upload numeric CSV, validate items, choose factor range, run analysis, inspect
+ progress, export report, export reproducibility bundle.
+
+- [x] **Step 2: Add failure-state requirements**
+
+ Specify copy and behavior for:
+
+ - non-numeric columns,
+ - factor count greater than item count,
+ - missing `shiny` or `DT`,
+ - long-running model timeout,
+ - model convergence failure.
+
+- [x] **Step 3: Defer repo split**
+
+ Add a section named `Split Criteria`:
+
+ - split when UI needs auth, tenancy, hosted deployment, or non-R frontend
+ build tooling;
+ - do not use a submodule unless a downstream buyer requires vendored source.
+
+- [x] **Step 4: Verify**
+
+ Run:
+
+ ```bash
+ npx -y markdownlint-cli2@0.20.0 docs/product/kaefa-studio-requirements.md
+ ```
+
+ Expected: no markdownlint errors.
+
+## Task 5: Runner And Deployment Package
+
+**Files:**
+
+- Create: `Dockerfile`
+- Create: `deploy/shinyproxy/application.yml.example`
+- Create: `docs/operations/deployment.md`
+
+**Interfaces:**
+
+- Consumes: `launchAEFA()` and `inst/shiny-app/app.R`.
+- Produces: an enterprise-evaluable runtime path.
+
+- [x] **Step 1: Package the app**
+
+ Add a Dockerfile that installs package dependencies and runs the Shiny app.
+
+- [x] **Step 2: Add ShinyProxy example**
+
+ Add a ShinyProxy `application.yml.example` that documents image name, app
+ port, authentication placeholder, resource limits, and no bundled secrets.
+
+- [x] **Step 3: Document local smoke test**
+
+ `docs/operations/deployment.md` must include:
+
+ ```bash
+ podman build -t kaefa-studio:local .
+ podman run --rm -p 3838:3838 kaefa-studio:local
+ ```
+
+ Docker can use the same arguments where Docker is the available runtime.
+
+- [x] **Step 4: Verify**
+
+ Run:
+
+ ```bash
+ podman build -t kaefa-studio:local .
+ ```
+
+ Expected: image builds without missing R package dependencies.
+
+## Task 6: Revenue And Pilot Evidence
+
+**Files:**
+
+- Create: `docs/business/2b-krw-commercial-model.md`
+- Create: `docs/business/pilot-scorecard.md`
+
+**Interfaces:**
+
+- Consumes: KPI framework in this plan.
+- Produces: buyer-readable revenue proof targets and pilot acceptance criteria.
+
+- [x] **Step 1: Write commercial model**
+
+ Create `docs/business/2b-krw-commercial-model.md` with three pricing paths:
+
+ - annual institution license,
+ - hosted assessment analytics workspace,
+ - validation/reporting services attached to product subscription.
+
+- [x] **Step 2: Set 2B KRW thresholds**
+
+ Include this table:
+
+ ```md
+ | Exit multiple | ARR needed for 2B KRW value |
+ | --- | ---: |
+ | 3x ARR | 667M KRW |
+ | 4x ARR | 500M KRW |
+ | 5x ARR | 400M KRW |
+ | Strategic IP sale | Lower ARR possible with stronger benchmark evidence |
+ ```
+
+- [x] **Step 3: Write pilot scorecard**
+
+ `docs/business/pilot-scorecard.md` must score each pilot on:
+
+ - dataset complexity,
+ - successful report generation,
+ - time-to-report,
+ - analyst intervention required,
+ - willingness to pay,
+ - renewal or expansion path,
+ - security/privacy constraints.
+
+- [x] **Step 4: Verify**
+
+ Run:
+
+ ```bash
+ npx -y markdownlint-cli2@0.20.0 docs/business/2b-krw-commercial-model.md docs/business/pilot-scorecard.md
+ ```
+
+ Expected: no markdownlint errors.
+
+## Task 7: Release Diligence Bundle
+
+**Files:**
+
+- Create: `docs/diligence/release-diligence-checklist.md`
+- Modify: `ARCHITECTURE.md`
+- Modify: `TRD.md`
+
+**Interfaces:**
+
+- Consumes: all prior tasks.
+- Produces: a concise buyer due-diligence package.
+
+- [x] **Step 1: Add checklist**
+
+ Create `docs/diligence/release-diligence-checklist.md` with sections:
+
+ - product scope,
+ - install and deployment proof,
+ - benchmark evidence,
+ - security and privacy posture,
+ - license and IP posture,
+ - revenue and pilots,
+ - known limitations.
+
+- [x] **Step 2: Update architecture**
+
+ Update `ARCHITECTURE.md` with the internal boundaries:
+ `kaefa-core`, `kaefa-studio`, and `kaefa-runner`.
+
+- [x] **Step 3: Update TRD**
+
+ Update `TRD.md` with the CI/test-gate distinction:
+ fast PR gate, scheduled benchmark gate, release diligence gate.
+
+- [x] **Step 4: Verify**
+
+ Run:
+
+ ```bash
+ npx -y markdownlint-cli2@0.20.0 ARCHITECTURE.md TRD.md docs/diligence/release-diligence-checklist.md
+ ```
+
+ Expected: no markdownlint errors.
+
+## Execution Order
+
+1. Complete Task 1 first. License/IP ambiguity can invalidate the commercial
+ story.
+2. Complete Task 2 second. A buyer will discount a statistical package whose
+ tests are present but not enforced.
+3. Complete Task 3 third. Benchmark evidence is the core technical moat.
+4. Complete Task 4 fourth. The UI needs product-grade failure states before a
+ non-R buyer can evaluate it.
+5. Complete Task 5 fifth. Hosted deployment is optional for research use, but
+ mandatory for enterprise evaluation.
+6. Complete Task 6 sixth. The valuation target needs revenue proof or a clear
+ strategic-IP rationale.
+7. Complete Task 7 last. The diligence bundle is a packaging pass over evidence,
+ not a substitute for evidence.
+
+## Current Blockers
+
+- The containerized Studio runtime now builds and returns `HTTP/1.1 200 OK`
+ locally; direct host-R Shiny runs still depend on the host installing
+ `shiny`, `DT`, and `fitdistrplus`.
+- CodeGraph tools were not available for this execution environment's R source
+ analysis, so R implementation checks used native file reads.
+- Legal/license posture still needs owner/legal confirmation before changing
+ away from GPL or claiming proprietary sale rights.
+
+## References
+
+- FigJam roadmap:
+
+- Current market context checked on 2026-07-02:
+ - [Spherical Insights psychometric tests market] reported the global market
+ at USD 9.47B in 2023 and forecast USD 30.12B by 2033.
+ - [Market Research Future online testing software market] reported the market
+ at USD 5.279B in 2024 and forecast USD 14.87B by 2035.
+ - [Coherent Market Insights assessment services market] estimated assessment
+ services at USD 13.238B in 2026 and forecast USD 32.721B in 2033.
+ - [Aventis Advisors SaaS valuation multiples] reported a March 2026 median
+ SaaS EV/revenue multiple of 3.4x.
+ - [SaaS Capital valuation multiples] reported 2025 private SaaS valuation
+ estimates around 4.8x-5.3x current run-rate annualized revenue.
+ - `mirt` remains the critical statistical dependency and is GPL (>= 3).
+
+[Spherical Insights psychometric tests market]: https://www.sphericalinsights.com/reports/psychometric-tests-market
+[Market Research Future online testing software market]: https://www.marketresearchfuture.com/reports/online-testing-software-market-39186
+[Coherent Market Insights assessment services market]: https://www.coherentmarketinsights.com/market-insight/assessment-services-market-5935
+[Aventis Advisors SaaS valuation multiples]: https://aventis-advisors.com/saas-valuation-multiples/
+[SaaS Capital valuation multiples]: https://www.saas-capital.com/blog-posts/saas-valuation-multiples-understanding-the-new-normal/
diff --git a/docs/validation/benchmark-protocol.md b/docs/validation/benchmark-protocol.md
new file mode 100644
index 0000000..a200a3d
--- /dev/null
+++ b/docs/validation/benchmark-protocol.md
@@ -0,0 +1,76 @@
+# Benchmark Protocol
+
+## Purpose
+
+The benchmark corpus turns `kaefa` validation from anecdotal examples into a
+repeatable buyer-diligence artifact. Each benchmark should answer one question:
+does `kaefa` produce an expected, explainable result within an acceptable
+runtime budget for a known class of assessment data?
+
+## Dataset Admission Criteria
+
+A dataset can enter the benchmark corpus when all of these are true:
+
+- its source and license are documented,
+- it can be used in CI or a controlled release-validation environment,
+- its row count, item count, and response type are recorded,
+- an expected factor-count range is reviewed,
+- its runtime budget is realistic for the selected validation tier.
+
+Do not commit customer, private, regulated, or otherwise restricted assessment
+data to the repository.
+
+## Validation Tiers
+
+- **Manifest tier:** validates benchmark metadata shape and basic ranges on
+ every PR.
+- **Smoke tier:** runs fast installed-package or example-data checks on PRs when
+ dependencies are available.
+- **Release tier:** runs heavier model-fitting checks before tagged releases or
+ buyer-facing diligence packages.
+- **Private tier:** runs restricted datasets outside public CI, with only
+ reviewed aggregate evidence committed back to documentation.
+
+## Expected Result Review
+
+For each dataset, record the expected factor-count range instead of a single
+point estimate unless a reviewed source makes the exact value defensible. A
+reviewed range is acceptable when it comes from one of:
+
+- published dataset documentation,
+- a reviewed statistical analysis note,
+- repeated historical `kaefa` runs with stable output,
+- expert review by the package owner or a psychometrics reviewer.
+
+## Runtime Measurement
+
+Record runtime as wall-clock seconds from the start of the `aefa()` call to the
+available result object. Runtime budgets should name the hardware or CI runner
+class used for measurement when the benchmark graduates from manifest tier.
+
+## Acceptable Failure Categories
+
+Failures should be classified before release signoff:
+
+- missing dependency,
+- invalid dataset metadata,
+- model non-convergence,
+- runtime budget exceeded,
+- unsupported response type,
+- expected-result mismatch,
+- unstructured error or silent `NULL`.
+
+Unstructured errors and silent `NULL` results are product defects for
+sale-readiness purposes, even when the underlying statistical model cannot be
+fit.
+
+## Release Signoff
+
+A release or buyer-facing diligence package should include:
+
+- manifest validation results,
+- smoke benchmark results,
+- release-tier benchmark results when available,
+- known benchmark exclusions,
+- hardware/runtime notes,
+- reviewer and date for any expected-result changes.
diff --git a/inst/benchmarks/README.md b/inst/benchmarks/README.md
new file mode 100644
index 0000000..db333a5
--- /dev/null
+++ b/inst/benchmarks/README.md
@@ -0,0 +1,18 @@
+# kaefa Benchmark Corpus
+
+This directory records datasets used to validate `kaefa` behavior across
+releases. The first corpus version is intentionally small: it defines the
+manifest contract before adding heavier runtime benchmarks.
+
+Each benchmark dataset must have an entry in `manifest.csv` with:
+
+- source and license context,
+- dataset shape,
+- response type,
+- expected factor-count range,
+- expected runtime budget,
+- notes about why the dataset belongs in the corpus.
+
+Do not commit private, restricted, or customer-owned assessment data here.
+Private validation datasets should live in a separately controlled storage
+location and be referenced only by reviewed metadata.
diff --git a/inst/benchmarks/manifest.csv b/inst/benchmarks/manifest.csv
new file mode 100644
index 0000000..d8e7629
--- /dev/null
+++ b/inst/benchmarks/manifest.csv
@@ -0,0 +1,2 @@
+dataset_id,source,license,rows,items,response_type,expected_factor_min,expected_factor_max,expected_runtime_seconds,notes
+science,mirt::Science,GPL-3-or-later,392,4,mixed,1,2,120,mirt package example data; smoke benchmark for dependency and manifest validation
diff --git a/inst/shiny-app/app.R b/inst/shiny-app/app.R
index bfc669b..26872a9 100644
--- a/inst/shiny-app/app.R
+++ b/inst/shiny-app/app.R
@@ -177,6 +177,7 @@ server <- function(input, output, session) {
values <- reactiveValues(
data = NULL,
results = NULL,
+ runOptions = NULL,
analysisComplete = FALSE
)
@@ -216,6 +217,7 @@ server <- function(input, output, session) {
values$data <- NULL
values$analysisComplete <- FALSE
values$results <- NULL
+ values$runOptions <- NULL
return()
}
@@ -226,15 +228,25 @@ server <- function(input, output, session) {
values$data <- read.csv(input$dataFile$datapath,
header = input$hasHeader,
stringsAsFactors = FALSE)
- if (!all(sapply(values$data, is.numeric))) {
+ numeric_columns <- sapply(values$data, is.numeric)
+ if (!all(numeric_columns)) {
+ invalid_columns <- names(values$data)[!numeric_columns]
+ invalid_preview <- paste(head(invalid_columns, 8), collapse = ", ")
+ if (length(invalid_columns) > 8) {
+ invalid_preview <- paste0(invalid_preview, ", ...")
+ }
showNotification(
- "All columns must be numeric for factor analysis. Please check your CSV file.",
+ paste0(
+ "All item columns must be numeric for factor analysis. ",
+ "Non-numeric columns: ", invalid_preview, "."
+ ),
type = "error",
duration = 10
)
values$data <- NULL
values$analysisComplete <- FALSE
values$results <- NULL
+ values$runOptions <- NULL
return()
}
} else if (ext == "rds") {
@@ -246,6 +258,7 @@ server <- function(input, output, session) {
values$data <- NULL
values$analysisComplete <- FALSE
values$results <- NULL
+ values$runOptions <- NULL
return()
} else {
showNotification(paste("Unsupported file type:", ext),
@@ -253,11 +266,13 @@ server <- function(input, output, session) {
values$data <- NULL
values$analysisComplete <- FALSE
values$results <- NULL
+ values$runOptions <- NULL
return()
}
values$analysisComplete <- FALSE
values$results <- NULL
+ values$runOptions <- NULL
showNotification("Data loaded successfully!", type = "message")
@@ -331,6 +346,18 @@ server <- function(input, output, session) {
duration = NULL, id = "analysisProgress", type = "message")
tryCatch({
+ values$runOptions <- list(
+ packageVersion = as.character(utils::packageVersion("kaefa")),
+ startedAt = format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC"),
+ rows = nrow(values$data),
+ items = ncol(values$data),
+ minFactors = input$minFactors,
+ maxFactors = input$maxFactors,
+ rotation = input$rotation,
+ modelSelection = input$modelSelection,
+ saveHistory = input$saveHistory
+ )
+
# Run aefa analysis
values$results <- kaefa::aefa(
data = values$data,
@@ -482,9 +509,38 @@ server <- function(input, output, session) {
sink(file)
on.exit(sink(), add = TRUE)
+ run_options <- values$runOptions
+ option_value <- function(name, fallback) {
+ if (!is.null(run_options[[name]])) {
+ return(run_options[[name]])
+ }
+ fallback
+ }
+
cat("kaefa: Automated Exploratory Factor Analysis\n")
cat("=============================================\n\n")
cat("Report generated:", format(Sys.time(), "%Y-%m-%d %H:%M:%S"), "\n\n")
+ cat("Run Metadata:\n")
+ cat("-------------\n")
+ cat("kaefa package version:",
+ option_value("packageVersion",
+ as.character(utils::packageVersion("kaefa"))), "\n")
+ cat("Analysis started:", option_value("startedAt", "not recorded"), "\n")
+ cat("Data shape:",
+ option_value("rows", nrow(values$data)),
+ "rows x",
+ option_value("items", ncol(values$data)),
+ "items\n")
+ cat("Selected options:\n")
+ cat("- Minimum factors:",
+ option_value("minFactors", input$minFactors), "\n")
+ cat("- Maximum factors:",
+ option_value("maxFactors", input$maxFactors), "\n")
+ cat("- Rotation:", option_value("rotation", input$rotation), "\n")
+ cat("- Model selection:",
+ option_value("modelSelection", input$modelSelection), "\n")
+ cat("- Save model history:",
+ option_value("saveHistory", input$saveHistory), "\n\n")
if (inherits(values$results, "aefa")) {
cat("Analysis Summary:\n")
diff --git a/tests/FAST_TESTS.md b/tests/FAST_TESTS.md
new file mode 100644
index 0000000..856cb3b
--- /dev/null
+++ b/tests/FAST_TESTS.md
@@ -0,0 +1,22 @@
+# Fast Test Gate
+
+`test-fast` is the lightweight PR gate for metadata and productization checks
+that do not require expensive model fitting.
+
+It currently runs:
+
+```r
+reporter <- testthat::StopReporter$new()
+testthat::test_file("tests/testthat/test-benchmark-manifest.R",
+ reporter = reporter)
+testthat::test_file("tests/testthat/test-shiny-product-surface.R",
+ reporter = reporter)
+```
+
+`R-CMD-check` remains the package installation and multi-OS compatibility gate.
+The current workflow uses `--no-tests`, so it should not be treated as proof
+that the full statistical test suite ran.
+
+Release and buyer-diligence runs should add heavier benchmark tests from
+`inst/benchmarks/manifest.csv` once expected results and runtime budgets are
+reviewed.
diff --git a/tests/testthat/helper-test-data.R b/tests/testthat/helper-test-data.R
index 04f072a..b64cb03 100644
--- a/tests/testthat/helper-test-data.R
+++ b/tests/testthat/helper-test-data.R
@@ -1,4 +1,33 @@
# Shared helpers for test data generation.
+.find_kaefa_root <- function() {
+ candidates <- unique(c(
+ getwd(),
+ testthat::test_path("../.."),
+ testthat::test_path("..")
+ ))
+
+ for (candidate in candidates) {
+ if (file.exists(file.path(candidate, "DESCRIPTION"))) {
+ return(normalizePath(candidate, mustWork = TRUE))
+ }
+ }
+
+ stop("Cannot locate kaefa package root for test bootstrap.", call. = FALSE)
+}
+
+.ensure_kaefa_namespace <- function() {
+ if (requireNamespace("kaefa", quietly = TRUE)) {
+ return(invisible(TRUE))
+ }
+
+ if (!requireNamespace("pkgload", quietly = TRUE)) {
+ testthat::skip("kaefa is not installed and pkgload is unavailable")
+ }
+
+ pkgload::load_all(.find_kaefa_root(), export_all = FALSE, quiet = TRUE)
+ invisible(TRUE)
+}
+
create_test_data <- function(n_items = 10, n_obs = 100) {
set.seed(123)
data <- data.frame(matrix(
@@ -40,6 +69,7 @@ create_binary_test_data <- function(n_items = 10, n_obs = 100) {
aefa <- function(...) {
.skip_expensive_ci_calls("aefa")
+ .ensure_kaefa_namespace()
defaults <- list(
NCYCLES = 120,
BURNIN = 40,
@@ -57,6 +87,7 @@ efa <- aefa
engineAEFA <- function(...) {
.skip_expensive_ci_calls("engineAEFA")
+ .ensure_kaefa_namespace()
defaults <- list(
NCYCLES = 120,
BURNIN = 40,
diff --git a/tests/testthat/test-benchmark-manifest.R b/tests/testthat/test-benchmark-manifest.R
new file mode 100644
index 0000000..b95e92e
--- /dev/null
+++ b/tests/testthat/test-benchmark-manifest.R
@@ -0,0 +1,51 @@
+benchmark_manifest_path <- function() {
+ installed_path <- system.file(
+ "benchmarks",
+ "manifest.csv",
+ package = "kaefa"
+ )
+
+ if (nzchar(installed_path) && file.exists(installed_path)) {
+ return(installed_path)
+ }
+
+ repo_paths <- c(
+ file.path("inst", "benchmarks", "manifest.csv"),
+ file.path("..", "..", "inst", "benchmarks", "manifest.csv")
+ )
+ repo_paths[file.exists(repo_paths)][1]
+}
+
+test_that("benchmark manifest has required columns", {
+ manifest_path <- benchmark_manifest_path()
+
+ if (is.na(manifest_path) || !nzchar(manifest_path)) {
+ testthat::fail("benchmark manifest not found")
+ return(invisible(NULL))
+ }
+
+ manifest <- read.csv(manifest_path, stringsAsFactors = FALSE)
+ required_columns <- c(
+ "dataset_id",
+ "source",
+ "license",
+ "rows",
+ "items",
+ "response_type",
+ "expected_factor_min",
+ "expected_factor_max",
+ "expected_runtime_seconds",
+ "notes"
+ )
+
+ expect_true(all(required_columns %in% names(manifest)))
+ expect_equal(anyDuplicated(manifest$dataset_id), 0L)
+ expect_true(all(nzchar(manifest$dataset_id)))
+ expect_true(all(manifest$rows > 0))
+ expect_true(all(manifest$items > 0))
+ expect_true(all(manifest$expected_factor_min >= 1))
+ expect_true(all(manifest$expected_factor_min <= manifest$items))
+ expect_true(all(manifest$expected_factor_max >= manifest$expected_factor_min))
+ expect_true(all(manifest$expected_factor_max <= manifest$items))
+ expect_true(all(manifest$expected_runtime_seconds > 0))
+})
diff --git a/tests/testthat/test-shiny-product-surface.R b/tests/testthat/test-shiny-product-surface.R
new file mode 100644
index 0000000..59e21e1
--- /dev/null
+++ b/tests/testthat/test-shiny-product-surface.R
@@ -0,0 +1,33 @@
+shiny_app_path <- function() {
+ app_paths <- c(
+ file.path("inst", "shiny-app", "app.R"),
+ file.path("..", "..", "inst", "shiny-app", "app.R")
+ )
+ app_path <- app_paths[file.exists(app_paths)][1]
+
+ if (is.na(app_path) || !nzchar(app_path)) {
+ testthat::skip("Shiny app source file not found")
+ }
+
+ app_path
+}
+
+test_that("Shiny upload validation identifies invalid columns", {
+ app_path <- shiny_app_path()
+
+ app_source <- readLines(app_path, warn = FALSE)
+
+ expect_true(any(grepl("invalid_columns", app_source, fixed = TRUE)))
+ expect_true(any(grepl("Non-numeric columns", app_source, fixed = TRUE)))
+})
+
+test_that("Shiny report includes reproducibility metadata", {
+ app_path <- shiny_app_path()
+
+ app_source <- readLines(app_path, warn = FALSE)
+
+ expect_true(any(grepl("runOptions", app_source, fixed = TRUE)))
+ expect_true(any(grepl("kaefa package version", app_source, fixed = TRUE)))
+ expect_true(any(grepl("Data shape", app_source, fixed = TRUE)))
+ expect_true(any(grepl("Selected options", app_source, fixed = TRUE)))
+})