<?xml version="1.0"?>
<!DOCTYPE article
PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20190208//EN"
       "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.4" xml:lang="en">
 <front>
  <journal-meta>
   <journal-id journal-id-type="publisher-id">Scientific and analytical journal «Vestnik Saint-Petersburg university of State fire service of EMERCOM of Russia»</journal-id>
   <journal-title-group>
    <journal-title xml:lang="en">Scientific and analytical journal «Vestnik Saint-Petersburg university of State fire service of EMERCOM of Russia»</journal-title>
    <trans-title-group xml:lang="ru">
     <trans-title>Научно-аналитический журнал &quot;Вестник Санкт-Петербургского университета ГПС МЧС России&quot;</trans-title>
    </trans-title-group>
   </journal-title-group>
   <issn publication-format="online">2218-130X</issn>
  </journal-meta>
  <article-meta>
   <article-id pub-id-type="publisher-id">120408</article-id>
   <article-id pub-id-type="doi">10.61260/2218-130X-2026-1-30-42</article-id>
   <article-categories>
    <subj-group subj-group-type="toc-heading" xml:lang="ru">
     <subject>ИНФОРМАТИКА, ВЫЧИСЛИТЕЛЬНАЯ ТЕХНИКА И УПРАВЛЕНИЕ</subject>
    </subj-group>
    <subj-group subj-group-type="toc-heading" xml:lang="en">
     <subject>INFORMATICS, COMPUTER ENGINEERING AND CONTROL</subject>
    </subj-group>
    <subj-group>
     <subject>ИНФОРМАТИКА, ВЫЧИСЛИТЕЛЬНАЯ ТЕХНИКА И УПРАВЛЕНИЕ</subject>
    </subj-group>
   </article-categories>
   <title-group>
    <article-title xml:lang="en">ALGORITHM FOR SUPPORTING INDIVIDUAL KNOWLEDGE TESTING BASED ON A GENERATIVE ARTIFICIAL INTELLIGENCE SYSTEM</article-title>
    <trans-title-group xml:lang="ru">
     <trans-title>АЛГОРИТМ ПОДДЕРЖКИ ИНДИВИДУАЛЬНОГО  ТЕСТИРОВАНИЯ ЗНАНИЙ НА ОСНОВЕ СИСТЕМ  ГЕНЕРАТИВНОГО ИСКУССТВЕННОГО ИНТЕЛЛЕКТА</trans-title>
    </trans-title-group>
   </title-group>
   <contrib-group content-type="authors">
    <contrib contrib-type="author">
     <name-alternatives>
      <name xml:lang="ru">
       <surname>Коцюба</surname>
       <given-names>Игорь Юрьевич</given-names>
      </name>
      <name xml:lang="en">
       <surname>Kotsyuba</surname>
       <given-names>Igor Yurievich</given-names>
      </name>
     </name-alternatives>
     <email>ikotciuba@itmo.ru</email>
     <bio xml:lang="ru">
      <p>кандидат технических наук;</p>
     </bio>
     <bio xml:lang="en">
      <p>candidate of technical sciences;</p>
     </bio>
     <xref ref-type="aff" rid="aff-1"/>
    </contrib>
    <contrib contrib-type="author">
     <name-alternatives>
      <name xml:lang="ru">
       <surname>Лайок</surname>
       <given-names>Олег Владимирович</given-names>
      </name>
      <name xml:lang="en">
       <surname>Layok</surname>
       <given-names>Oleg Vladimirovich</given-names>
      </name>
     </name-alternatives>
     <email>laolvl@mail.ru</email>
     <xref ref-type="aff" rid="aff-2"/>
    </contrib>
    <contrib contrib-type="author">
     <name-alternatives>
      <name xml:lang="ru">
       <surname>Валдайцева</surname>
       <given-names>Мария Викторовна</given-names>
      </name>
      <name xml:lang="en">
       <surname>Valdayceva</surname>
       <given-names>Mariya Viktorovna</given-names>
      </name>
     </name-alternatives>
     <email>mvvaldaitceva@itmo.ru</email>
     <bio xml:lang="ru">
      <p>кандидат технических наук;</p>
     </bio>
     <bio xml:lang="en">
      <p>candidate of technical sciences;</p>
     </bio>
     <xref ref-type="aff" rid="aff-3"/>
    </contrib>
   </contrib-group>
   <aff-alternatives id="aff-1">
    <aff>
     <institution xml:lang="ru">Санкт-Петербургский национальный исследовательский университет информационных технологий, механики и оптики</institution>
    </aff>
    <aff>
     <institution xml:lang="en">Saint-Petersburg National Research University of Information Technologies, Mechanics and Optics</institution>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-2">
    <aff>
     <institution xml:lang="ru">Санкт-Петербургский национальный исследовательский университет информационных технологий, механики и оптики</institution>
     <city>Санкт-Петербург</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">ITMO university</institution>
     <city>Saint-Petersburg</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-3">
    <aff>
     <institution xml:lang="ru">Санкт-Петербургский национальный исследовательский университет информационных технологий, механики и оптики</institution>
     <city>Санкт-Петербург</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">ITMO university</institution>
     <city>Saint-Petersburg</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <pub-date publication-format="print" date-type="pub" iso-8601-date="2026-04-11T00:00:00+03:00">
    <day>11</day>
    <month>04</month>
    <year>2026</year>
   </pub-date>
   <pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-04-11T00:00:00+03:00">
    <day>11</day>
    <month>04</month>
    <year>2026</year>
   </pub-date>
   <volume>2026</volume>
   <issue>1</issue>
   <fpage>30</fpage>
   <lpage>42</lpage>
   <history>
    <date date-type="received" iso-8601-date="2026-01-12T00:00:00+03:00">
     <day>12</day>
     <month>01</month>
     <year>2026</year>
    </date>
    <date date-type="accepted" iso-8601-date="2026-03-25T00:00:00+03:00">
     <day>25</day>
     <month>03</month>
     <year>2026</year>
    </date>
   </history>
   <self-uri xlink:href="https://journals.igps.ru/en/nauka/article/120408/view">https://journals.igps.ru/en/nauka/article/120408/view</self-uri>
   <abstract xml:lang="ru">
    <p>Рассмотрен алгоритм автоматической генерации тематических тестов на примере тестов по английскому языку с использованием метода контрфактного анализа для повышения их качества на базе мобильного приложения.В ходе детального анализа предметной области языкового тестирования были выстроены четкие требования к будущему сервису, классифицированы ключевые форматы контроля знаний с описанием типовых упражнений и уровней сложности, на которых они применяются, что помогло собрать целостную картину навыков, требующих автоматизированной проверки. Выделены сложные точки существующих тестов: двусмысленные формулировки, множественность корректных ответов, трудоёмкий подбор.Разработан и апробирован комплексный подход к оценке эффективности промптов для генерации грамматических тестов на базе больших языковых моделей. В качестве ядра предложен контрфактный алгоритм, позволяющий выявлять латентные признаки, реально влияющие на выбор грамматических структур модели, точечно модифицировать промпт и оценивать изменения по трём взаимодополняющим метрикам. Применение алгоритма показало, что добавление явных указаний на самые значимые скрытые признаки повышает восприимчивость модели к ключевым факторам задания. Дальнейшая переоценка качества по разработанным метрикам и независимая экспертная проверка подтвердили статистически значимый прирост (p &lt; 0,01) как в грамматическом соответствии, так и в соответствии структуре заданий: средняя оценка повысилась с 0,91 до 0,95. Таким образом, контрфактный анализ действительно является эффективным инструментом тонкой настройки промптов; предложенный улучшенный промпт обеспечивает более надёжную генерацию тестовых материалов, соответствующих образовательным стандартам, и закладывает основу для масштабирования алгоритма на другие типы заданий и языковые навыки.</p>
   </abstract>
   <trans-abstract xml:lang="en">
    <p>The paper presents algorithm for the automatic generation of thematic tests using the example of English language tests using the counterfactual analysis method to improve their quality based on a mobile application. A detailed analysis of the language domain led to the development of clear requirements for the future service. Key forms of assessment knowledge were classified, along with descriptions of typical exercises and the difficulty levels in which they are used, helping to create a comprehensive picture of the skills requiring step-by-step assessment. The challenges of existing tests are highlighted: ambiguous wording, multiple correct answers, and labor-intensive selection. This paper develops and tests a comprehensive approach to assessing the effectiveness of prompts for generating grammar tests based on Large Language Models. A counterfactual algorithm is proposed as a core, which allows identifying latent features that actually influence the choice of grammatical structures of the model, selectively modifying the prompt, and evaluating changes using three complementary metrics. The application of the algorithm showed that adding explicit indications of the most significant hidden features increases the model's sensitivity to key factors of the task. Further re-evaluation of quality using the developed metrics and independent expert review confirmed a statistically significant increase (p &lt; 0.01) in both grammatical compliance and compliance with the structure of tasks: the average score increased from 0,91 to 0,95. Thus, counterfactual analysis is indeed an effective tool for fine-tuning prompts; &#13;
the proposed improved prompt ensures more reliable generation of test materials that meet educational standards and lays the foundation for scaling the algorithm to other types of tasks &#13;
and language skills.</p>
   </trans-abstract>
   <kwd-group xml:lang="ru">
    <kwd>качество образования</kwd>
    <kwd>искусственный интеллект</kwd>
    <kwd>Large Language Models</kwd>
    <kwd>промт</kwd>
    <kwd>контрфактный анализ</kwd>
    <kwd>латентные признаки</kwd>
    <kwd>грамматический тест</kwd>
    <kwd>контрфактный алгоритм</kwd>
    <kwd>восприимчивость модели</kwd>
    <kwd>генерация тестов</kwd>
   </kwd-group>
   <kwd-group xml:lang="en">
    <kwd>quality of education</kwd>
    <kwd>artificial intelligence</kwd>
    <kwd>Large Language Models</kwd>
    <kwd>prompt</kwd>
    <kwd>counterfactual analysis</kwd>
    <kwd>latent signs</kwd>
    <kwd>grammar test</kwd>
    <kwd>counterfactual algorithm</kwd>
    <kwd>model sensitivity</kwd>
    <kwd>test generation</kwd>
   </kwd-group>
  </article-meta>
 </front>
 <body>
  <p></p>
 </body>
 <back>
  <ref-list>
   <ref id="B1">
    <label>1.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Systematic literature review on opportunities, challenges, and future research recommendations of artificial intelligence in education / T.K.F. Chiu [et al.] // Computers and Education: Artificial Intelligence. 2023. Vol. 4. P. 100118. DOI: 10.1016/j.caeai.2022.100070</mixed-citation>
     <mixed-citation xml:lang="en">Systematic literature review on opportunities, challenges, and future research recommendations of artificial intelligence in education / T.K.F. Chiu [et al.] // Computers and Education: Artificial Intelligence. 2023. Vol. 4. P. 100118. DOI: 10.1016/j.caeai.2022.100070</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B2">
    <label>2.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Kalyan K.S., Rajasekharan A., Sangeetha S. AMMUS: A Survey of Transformer-based Pretrained Models in Natural Language Processing // arXiv preprint. 2021. DOI: 10.48550/arXiv.2108.05542</mixed-citation>
     <mixed-citation xml:lang="en">Kalyan K.S., Rajasekharan A., Sangeetha S. AMMUS: A Survey of Transformer-based Pretrained Models in Natural Language Processing // arXiv preprint. 2021. DOI: 10.48550/arXiv.2108.05542</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B3">
    <label>3.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Training language models to follow instructions with human feedback / L. Ouyang [et al.] // arXiv preprint. 2022. DOI:10.48550/arXiv.2203.02155</mixed-citation>
     <mixed-citation xml:lang="en">Training language models to follow instructions with human feedback / L. Ouyang [et al.] // arXiv preprint. 2022. DOI:10.48550/arXiv.2203.02155</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B4">
    <label>4.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Language Models are Few-Shot Learners / T.B. Brown [et al.] // arXiv preprint. 2020. DOI: 10.48550/arXiv:2005.14165</mixed-citation>
     <mixed-citation xml:lang="en">Language Models are Few-Shot Learners / T.B. Brown [et al.] // arXiv preprint. 2020. DOI: 10.48550/arXiv:2005.14165</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B5">
    <label>5.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">GPT-3 family: Diverse applications of a large language model / T.B. Brown [et al.] // arXiv preprint. 2021. DOI: 10.48550/arXiv:2105.14208</mixed-citation>
     <mixed-citation xml:lang="en">GPT-3 family: Diverse applications of a large language model / T.B. Brown [et al.] // arXiv preprint. 2021. DOI: 10.48550/arXiv:2105.14208</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B6">
    <label>6.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Text-davinci: A large language model for diverse and creative text generation / A. Radford [et al.] // arXiv preprint. 2022. DOI: 10.48550/arXiv:2201.12136</mixed-citation>
     <mixed-citation xml:lang="en">Text-davinci: A large language model for diverse and creative text generation / A. Radford [et al.] // arXiv preprint. 2022. DOI: 10.48550/arXiv:2201.12136</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B7">
    <label>7.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">ChatGPT for Good? On Opportunities and Challenges of Large Language Models for Education / E. Kasneci [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2304.11208</mixed-citation>
     <mixed-citation xml:lang="en">ChatGPT for Good? On Opportunities and Challenges of Large Language Models for Education / E. Kasneci [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2304.11208</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B8">
    <label>8.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Adapting Large Language Models for Education: Foundational Capabilities, Potentials, and Challenges / Q. Li [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2401.08664</mixed-citation>
     <mixed-citation xml:lang="en">Adapting Large Language Models for Education: Foundational Capabilities, Potentials, and Challenges / Q. Li [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2401.08664</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B9">
    <label>9.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Practical and Ethical Challenges of Large Language Models in Education: A Systematic Scoping Review / L. Yan [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2303.13379</mixed-citation>
     <mixed-citation xml:lang="en">Practical and Ethical Challenges of Large Language Models in Education: A Systematic Scoping Review / L. Yan [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2303.13379</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B10">
    <label>10.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Nitze A. Future-proofing Education: A Prototype for Simulating Oral Examinations Using Large Language Models // arXiv preprint. 2023. DOI: 10.48550/arXiv:2401.06160</mixed-citation>
     <mixed-citation xml:lang="en">Nitze A. Future-proofing Education: A Prototype for Simulating Oral Examinations Using Large Language Models // arXiv preprint. 2023. DOI: 10.48550/arXiv:2401.06160</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B11">
    <label>11.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Peng L., Nuchged B., Gao Y. Spoken Language Intelligence of Large Language Models for Language Learning // arXiv preprint. 2023. DOI: 10.48550/arXiv:2308.14536</mixed-citation>
     <mixed-citation xml:lang="en">Peng L., Nuchged B., Gao Y. Spoken Language Intelligence of Large Language Models for Language Learning // arXiv preprint. 2023. DOI: 10.48550/arXiv:2308.14536</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B12">
    <label>12.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Wang K., Ramos J., Lawrence R. ChatEd: A Chatbot Leveraging ChatGPT for an Enhanced Learning Experience in Higher Education // arXiv preprint. 2023. DOI: 10.48550/arXiv:2401.00052</mixed-citation>
     <mixed-citation xml:lang="en">Wang K., Ramos J., Lawrence R. ChatEd: A Chatbot Leveraging ChatGPT for an Enhanced Learning Experience in Higher Education // arXiv preprint. 2023. DOI: 10.48550/arXiv:2401.00052</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B13">
    <label>13.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Castleman B., Turkcan M.K. Examining the Influence of Varied Levels of Domain Knowledge Base Inclusion in GPT-based Intelligent Tutors // arXiv preprint. 2023. DOI: 10.48550/arXiv:2309.12367</mixed-citation>
     <mixed-citation xml:lang="en">Castleman B., Turkcan M.K. Examining the Influence of Varied Levels of Domain Knowledge Base Inclusion in GPT-based Intelligent Tutors // arXiv preprint. 2023. DOI: 10.48550/arXiv:2309.12367</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B14">
    <label>14.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Large Language Models in Education: Vision and Opportunities / W. Gan [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2311.13160</mixed-citation>
     <mixed-citation xml:lang="en">Large Language Models in Education: Vision and Opportunities / W. Gan [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2311.13160</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B15">
    <label>15.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Challenges and Opportunities of Generative AI for Higher Education as Explained by ChatGPT / R. Michel-Villarreal [et al.] // Education Sciences. 2023. Vol. 13. № 9. P. 856. DOI: 10.3390/educsci13090856</mixed-citation>
     <mixed-citation xml:lang="en">Challenges and Opportunities of Generative AI for Higher Education as Explained by ChatGPT / R. Michel-Villarreal [et al.] // Education Sciences. 2023. Vol. 13. № 9. P. 856. DOI: 10.3390/educsci13090856</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B16">
    <label>16.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">A systematic survey of prompt engineering in large language models: Techniques and applications / P. Sahoo [et al.] // arXiv preprint. 2024. DOI: 10.48550/arXiv:2402.07927</mixed-citation>
     <mixed-citation xml:lang="en">A systematic survey of prompt engineering in large language models: Techniques and applications / P. Sahoo [et al.] // arXiv preprint. 2024. DOI: 10.48550/arXiv:2402.07927</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B17">
    <label>17.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Luo H., Specia L. From understanding to utilization: A survey on explainability for large language models // arXiv preprint. 2024. DOI: 10.48550/arXiv:2309.01029</mixed-citation>
     <mixed-citation xml:lang="en">Luo H., Specia L. From understanding to utilization: A survey on explainability for large language models // arXiv preprint. 2024. DOI: 10.48550/arXiv:2309.01029</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B18">
    <label>18.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Analyzing Chain-of-Thought Prompting in Large Language Models via Gradient-based Feature Attributions / S. Wu [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2309.01029</mixed-citation>
     <mixed-citation xml:lang="en">Analyzing Chain-of-Thought Prompting in Large Language Models via Gradient-based Feature Attributions / S. Wu [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2309.01029</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B19">
    <label>19.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Larger language models do in-context learning differently / J. Wei [et al.] // arXiv preprint. 2024. DOI: 10.48550/arXiv:2405.19592</mixed-citation>
     <mixed-citation xml:lang="en">Larger language models do in-context learning differently / J. Wei [et al.] // arXiv preprint. 2024. DOI: 10.48550/arXiv:2405.19592</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B20">
    <label>20.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Madsen A., Chandar S., Reddy S. Can Large Language Models Explain Themselves? // arXiv preprint. 2024. DOI: 10.48550/arXiv:2401.07927</mixed-citation>
     <mixed-citation xml:lang="en">Madsen A., Chandar S., Reddy S. Can Large Language Models Explain Themselves? // arXiv preprint. 2024. DOI: 10.48550/arXiv:2401.07927</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B21">
    <label>21.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">LLMs as Counterfactual Explanation Modules: Can ChatGPT Explain Black-box Text Classifiers? / A. Bhattacharjee [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2309.13340</mixed-citation>
     <mixed-citation xml:lang="en">LLMs as Counterfactual Explanation Modules: Can ChatGPT Explain Black-box Text Classifiers? / A. Bhattacharjee [et al.] // arXiv preprint. 2023. DOI: 10.48550/arXiv:2309.13340</mixed-citation>
    </citation-alternatives>
   </ref>
  </ref-list>
 </back>
</article>
