[
{
"title": "AR-RAG: Autoregressive Retrieval Augmentation for Image Generation",
"authors": [
"Jingyuan Qi",
"Zhiyang Xu",
"Qifan Wang",
"Lifu Huang"
],
"published": "2025-06-08",
"summary": "We introduce Autoregressive Retrieval Augmentation (AR-RAG), a novel paradigm that enhances image generation by autoregressively incorporating knearest neighbor retrievals at the patch level. Unlike prior methods that perform a single, static retrieval before generation and condition the entire generation on fixed reference images, AR-RAG performs context-aware retrievals at each generation step, using prior-generated patches as queries to retrieve and incorporate the most relevant patch-level v",
"filename": "AR-RAG__Autoregressive_Retrieval_Augmentation_for__2506.06962v3.pdf",
"arxiv_id": "2506.06962v3"
},
{
"title": "Intelligent Interaction Strategies for Context-Aware Cognitive Augmentation",
"authors": [
"Xiangrong",
"Zhu",
"Yuan Xu",
"Tianjian Liu",
"Jingwei Sun",
"Yu Zhang",
"Xin Tong"
],
"published": "2025-04-18",
"summary": "Human cognition is constrained by processing limitations, leading to cognitive overload and inefficiencies in knowledge synthesis and decision-making. Large Language Models (LLMs) present an opportunity for cognitive augmentation, but their current reactive nature limits their real-world applicability. This position paper explores the potential of context-aware cognitive augmentation, where LLMs dynamically adapt to users' cognitive states and task environments to provide appropriate support. Th",
"filename": "Intelligent_Interaction_Strategies_for_Context-Awa_2504.13684v1.pdf",
"arxiv_id": "2504.13684v1"
},
{
"title": "Factually: Exploring Wearable Fact-Checking for Augmented Truth Discernment",
"authors": [
"Chitralekha Gupta",
"Hanjun Wu",
"Praveen Sasikumar",
"Shreyas Sridhar",
"Priambudi Bagaskara",
"Suranga Nanayakkara"
],
"published": "2025-04-24",
"summary": "Wearable devices are transforming human capabilities by seamlessly augmenting cognitive functions. In this position paper, we propose a voice-based, interactive learning companion designed to amplify and extend cognitive abilities through informal learning. Our vision is threefold: (1) to enable users to discover new knowledge on-the-go through contextual interactive quizzes, fostering critical thinking and mindfulness, (2) to proactively detect misinformation, empowering users to critically ass",
"filename": "Factually__Exploring_Wearable_Fact-Checking_for_Au_2504.17204v1.pdf",
"arxiv_id": "2504.17204v1"
},
{
"title": "Designing AI Systems that Augment Human Performed vs. Demonstrated Critical Thinking",
"authors": [
"Katelyn Xiaoying Mei",
"Nic Weber"
],
"published": "2025-04-20",
"summary": "The recent rapid advancement of LLM-based AI systems has accelerated our search and production of information. While the advantages brought by these systems seemingly improve the performance or efficiency of human activities, they do not necessarily enhance human capabilities. Recent research has started to examine the impact of generative AI on individuals' cognitive abilities, especially critical thinking. Based on definitions of critical thinking across psychology and education, this position",
"filename": "Designing_AI_Systems_that_Augment_Human_Performed__2504.14689v1.pdf",
"arxiv_id": "2504.14689v1"
},
{
"title": "Automated Literature Review Using NLP Techniques and LLM-Based Retrieval-Augmented Generation",
"authors": [
"Nurshat Fateh Ali",
"Md. Mahdi Mohtasim",
"Shakil Mosharrof",
"T. Gopi Krishna"
],
"published": "2024-11-27",
"summary": "This research presents and compares multiple approaches to automate the generation of literature reviews using several Natural Language Processing (NLP) techniques and retrieval-augmented generation (RAG) with a Large Language Model (LLM). The ever-increasing number of research articles provides a huge challenge for manual literature review. It has resulted in an increased demand for automation. Developing a system capable of automatically generating the literature reviews from only the PDF file",
"filename": "Automated_Literature_Review_Using_NLP_Techniques_a_2411.18583v1.pdf",
"arxiv_id": "2411.18583v1"
},
{
"title": "EVOR: Evolving Retrieval for Code Generation",
"authors": [
"Hongjin Su",
"Shuyang Jiang",
"Yuhang Lai",
"Haoyuan Wu",
"Boao Shi",
"Che Liu",
"Qian Liu",
"Tao Yu"
],
"published": "2024-02-19",
"summary": "Recently the retrieval-augmented generation (RAG) has been successfully applied in code generation. However, existing pipelines for retrieval-augmented code generation (RACG) employ static knowledge bases with a single source, limiting the adaptation capabilities of Large Language Models (LLMs) to domains they have insufficient knowledge of. In this work, we develop a novel pipeline, EVOR, that employs the synchronous evolution of both queries and diverse knowledge bases. On two realistic settin",
"filename": "EVOR__Evolving_Retrieval_for_Code_Generation_2402.12317v2.pdf",
"arxiv_id": "2402.12317v2"
},
{
"title": "Riddle Me This! Stealthy Membership Inference for Retrieval-Augmented Generation",
"authors": [
"Ali Naseh",
"Yuefeng Peng",
"Anshuman Suri",
"Harsh Chaudhari",
"Alina Oprea",
"Amir Houmansadr"
],
"published": "2025-02-01",
"summary": "Retrieval-Augmented Generation (RAG) enables Large Language Models (LLMs) to generate grounded responses by leveraging external knowledge databases without altering model parameters. Although the absence of weight tuning prevents leakage via model parameters, it introduces the risk of inference adversaries exploiting retrieved documents in the model's context. Existing methods for membership inference and data extraction often rely on jailbreaking or carefully crafted unnatural queries, which ca",
"filename": "Riddle_Me_This!_Stealthy_Membership_Inference_for__2502.00306v2.pdf",
"arxiv_id": "2502.00306v2"
},
{
"title": "Demystifying Instruction Mixing for Fine-tuning Large Language Models",
"authors": [
"Renxi Wang",
"Haonan Li",
"Minghao Wu",
"Yuxia Wang",
"Xudong Han",
"Chiyu Zhang",
"Timothy Baldwin"
],
"published": "2023-12-17",
"summary": "Instruction tuning significantly enhances the performance of large language models (LLMs) across various tasks. However, the procedure to optimizing the mixing of instruction datasets for LLM fine-tuning is still poorly understood. This study categorizes instructions into three primary types: NLP downstream tasks, coding, and general chat. We explore the effects of instruction tuning on different combinations of datasets on LLM performance, and find that certain instruction types are more advant",
"filename": "Demystifying_Instruction_Mixing_for_Fine-tuning_La_2312.10793v3.pdf",
"arxiv_id": "2312.10793v3"
},
{
"title": "Differentially Private Fine-tuning of Language Models",
"authors": [
"Da Yu",
"Saurabh Naik",
"Arturs Backurs",
"Sivakanth Gopi",
"Huseyin A. Inan",
"Gautam Kamath",
"Janardhan Kulkarni",
"Yin Tat Lee",
"Andre Manoel",
"Lukas Wutschitz",
"Sergey Yekhanin",
"Huishuai Zhang"
],
"published": "2021-10-13",
"summary": "We give simpler, sparser, and faster algorithms for differentially private fine-tuning of large-scale pre-trained language models, which achieve the state-of-the-art privacy versus utility tradeoffs on many standard NLP tasks. We propose a meta-framework for this problem, inspired by the recent success of highly parameter-efficient methods for fine-tuning. Our experiments show that differentially private adaptations of these approaches outperform previous private algorithms in three important di",
"filename": "Differentially_Private_Fine-tuning_of_Language_Mod_2110.06500v2.pdf",
"arxiv_id": "2110.06500v2"
},
{
"title": "Learning From Failure: Integrating Negative Examples when Fine-tuning Large Language Models as Agents",
"authors": [
"Renxi Wang",
"Haonan Li",
"Xudong Han",
"Yixuan Zhang",
"Timothy Baldwin"
],
"published": "2024-02-18",
"summary": "Large language models (LLMs) have achieved success in acting as agents, which interact with environments through tools such as search engines. However, LLMs are optimized for language generation instead of tool use during training or alignment, limiting their effectiveness as agents. To resolve this problem, previous work has first collected interaction trajectories between LLMs and environments, using only trajectories that successfully finished the task to fine-tune smaller models, making fine",
"filename": "Learning_From_Failure__Integrating_Negative_Exampl_2402.11651v2.pdf",
"arxiv_id": "2402.11651v2"
},
{
"title": "Fine-tuning with Very Large Dropout",
"authors": [
"Jianyu Zhang",
"L\u00e9on Bottou"
],
"published": "2024-03-01",
"summary": "It is impossible today to pretend that the practice of machine learning is always compatible with the idea that training and testing data follow the same distribution. Several authors have recently used ensemble techniques to show how scenarios involving multiple data distributions are best served by representations that are both richer than those obtained by regularizing for the best in-distribution performance, and richer than those obtained under the influence of the implicit sparsity bias of",
"filename": "Fine-tuning_with_Very_Large_Dropout_2403.00946v3.pdf",
"arxiv_id": "2403.00946v3"
},
{
"title": "WizardLM: Empowering large pre-trained language models to follow complex instructions",
"authors": [
"Can Xu",
"Qingfeng Sun",
"Kai Zheng",
"Xiubo Geng",
"Pu Zhao",
"Jiazhan Feng",
"Chongyang Tao",
"Qingwei Lin",
"Daxin Jiang"
],
"published": "2023-04-24",
"summary": "Training large language models (LLMs) with open-domain instruction following data brings colossal success. However, manually creating such instruction data is very time-consuming and labor-intensive. Moreover, humans may struggle to produce high-complexity instructions. In this paper, we show an avenue for creating large amounts of instruction data with varying levels of complexity using LLM instead of humans. Starting with an initial set of instructions, we use our proposed Evol-Instruct to rew",
"filename": "WizardLM__Empowering_large_pre-trained_language_mo_2304.12244v3.pdf",
"arxiv_id": "2304.12244v3"
},
{
"title": "Large Language Models Reasoning Abilities Under Non-Ideal Conditions After RL-Fine-Tuning",
"authors": [
"Chang Tian",
"Matthew B. Blaschko",
"Mingzhe Xing",
"Xiuxing Li",
"Yinliang Yue",
"Marie-Francine Moens"
],
"published": "2025-08-06",
"summary": "Reinforcement learning (RL) has become a key technique for enhancing the reasoning abilities of large language models (LLMs), with policy-gradient algorithms dominating the post-training stage because of their efficiency and effectiveness. However, most existing benchmarks evaluate large-language-model reasoning under idealized settings, overlooking performance in realistic, non-ideal scenarios. We identify three representative non-ideal scenarios with practical relevance: summary inference, fin",
"filename": "Large_Language_Models_Reasoning_Abilities_Under_No_2508.04848v1.pdf",
"arxiv_id": "2508.04848v1"
},
{
"title": "Learning to Perform Complex Tasks through Compositional Fine-Tuning of Language Models",
"authors": [
"Victor S. Bursztyn",
"David Demeter",
"Doug Downey",
"Larry Birnbaum"
],
"published": "2022-10-23",
"summary": "How to usefully encode compositional task structure has long been a core challenge in AI. Recent work in chain of thought prompting has shown that for very large neural language models (LMs), explicitly demonstrating the inferential steps involved in a target task may improve performance over end-to-end learning that focuses on the target task alone. However, chain of thought prompting has significant limitations due to its dependency on huge pretrained LMs. In this work, we present compositiona",
"filename": "Learning_to_Perform_Complex_Tasks_through_Composit_2210.12607v1.pdf",
"arxiv_id": "2210.12607v1"
},
{
"title": "Transformer-based Personalized Attention Mechanism for Medical Images with Clinical Records",
"authors": [
"Yusuke Takagi",
"Noriaki Hashimoto",
"Hiroki Masuda",
"Hiroaki Miyoshi",
"Koichi Ohshima",
"Hidekata Hontani",
"Ichiro Takeuchi"
],
"published": "2022-06-07",
"summary": "In medical image diagnosis, identifying the attention region, i.e., the region of interest for which the diagnosis is made, is an important task. Various methods have been developed to automatically identify target regions from given medical images. However, in actual medical practice, the diagnosis is made based not only on the images but also on a variety of clinical records. This means that pathologists examine medical images with some prior knowledge of the patients and that the attention re",
"filename": "Transformer-based_Personalized_Attention_Mechanism_2206.03003v2.pdf",
"arxiv_id": "2206.03003v2"
},
{
"title": "Dilated Neighborhood Attention Transformer",
"authors": [
"Ali Hassani",
"Humphrey Shi"
],
"published": "2022-09-29",
"summary": "Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities, domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have also gained significant attention, thanks to their performance and easy integration into existing frameworks. These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA) or Swin Transformer's Shifted Window Self Atten",
"filename": "Dilated_Neighborhood_Attention_Transformer_2209.15001v3.pdf",
"arxiv_id": "2209.15001v3"
},
{
"title": "Music Transformer",
"authors": [
"Cheng-Zhi Anna Huang",
"Ashish Vaswani",
"Jakob Uszkoreit",
"Noam Shazeer",
"Ian Simon",
"Curtis Hawthorne",
"Andrew M. Dai",
"Matthew D. Hoffman",
"Monica Dinculescu",
"Douglas Eck"
],
"published": "2018-09-12",
"summary": "Music relies heavily on repetition to build structure and meaning. Self-reference occurs on multiple timescales, from motifs to phrases to reusing of entire sections of music, such as in pieces with ABA structure. The Transformer (Vaswani et al., 2017), a sequence model based on self-attention, has achieved compelling results in many generation tasks that require maintaining long-range coherence. This suggests that self-attention might also be well-suited to modeling music. In musical compositio",
"filename": "Music_Transformer_1809.04281v3.pdf",
"arxiv_id": "1809.04281v3"
},
{
"title": "D\u00e9j\u00e0 vu: A Contextualized Temporal Attention Mechanism for Sequential Recommendation",
"authors": [
"Jibang Wu",
"Renqin Cai",
"Hongning Wang"
],
"published": "2020-01-29",
"summary": "Predicting users' preferences based on their sequential behaviors in history is challenging and crucial for modern recommender systems. Most existing sequential recommendation algorithms focus on transitional structure among the sequential actions, but largely ignore the temporal and context information, when modeling the influence of a historical event to current prediction.\n In this paper, we argue that the influence from the past events on a user's current action should vary over the course ",
"filename": "D\u00e9j\u00e0_vu__A_Contextualized_Temporal_Attention_Mecha_2002.00741v1.pdf",
"arxiv_id": "2002.00741v1"
},
{
"title": "Mask-Attention-Free Transformer for 3D Instance Segmentation",
"authors": [
"Xin Lai",
"Yuhui Yuan",
"Ruihang Chu",
"Yukang Chen",
"Han Hu",
"Jiaya Jia"
],
"published": "2023-09-04",
"summary": "Recently, transformer-based methods have dominated 3D instance segmentation, where mask attention is commonly involved. Specifically, object queries are guided by the initial instance masks in the first cross-attention, and then iteratively refine themselves in a similar manner. However, we observe that the mask-attention pipeline usually leads to slow convergence due to low-recall initial instance masks. Therefore, we abandon the mask attention design and resort to an auxiliary center regressio",
"filename": "Mask-Attention-Free_Transformer_for_3D_Instance_Se_2309.01692v1.pdf",
"arxiv_id": "2309.01692v1"
},
{
"title": "Attention Guided CAM: Visual Explanations of Vision Transformer Guided by Self-Attention",
"authors": [
"Saebom Leem",
"Hyunseok Seo"
],
"published": "2024-02-07",
"summary": "Vision Transformer(ViT) is one of the most widely used models in the computer vision field with its great performance on various tasks. In order to fully utilize the ViT-based architecture in various applications, proper visualization methods with a decent localization performance are necessary, but these methods employed in CNN-based models are still not available in ViT due to its unique structure. In this work, we propose an attention-guided visualization method applied to ViT that provides a",
"filename": "Attention_Guided_CAM__Visual_Explanations_of_Visio_2402.04563v1.pdf",
"arxiv_id": "2402.04563v1"
},
{
"title": "Synthesis of Mechanism for single- and hybrid-tasks using Differential Evolution",
"authors": [
"F. Penunuri",
"R. Peon-Escalante",
"C. Villanueva",
"D. Pech-Oy"
],
"published": "2011-02-10",
"summary": "The optimal dimensional synthesis for planar mechanisms using differential evolution (DE) is demonstrated. Four examples are included: in the first case, the synthesis of a mechanism for hybrid-tasks, considering path generation, function generation, and motion generation, is carried out. The second and third cases pertain to path generation, with and without prescribed timing. Finally, the synthesis of an Ackerman mechanism is reported. Order defect problem is solved by manipulating individuals",
"filename": "Synthesis_of_Mechanism_for_single-_and_hybrid-task_1102.2017v2.pdf",
"arxiv_id": "1102.2017v2"
},
{
"title": "Towards Goal-oriented Prompt Engineering for Large Language Models: A Survey",
"authors": [
"Haochen Li",
"Jonathan Leung",
"Zhiqi Shen"
],
"published": "2024-01-25",
"summary": "Large Language Models (LLMs) have shown prominent performance in various downstream tasks and prompt engineering plays a pivotal role in optimizing LLMs' performance. This paper, not only as an overview of current prompt engineering methods, but also aims to highlight the limitation of designing prompts based on an anthropomorphic assumption that expects LLMs to think like humans. From our review of 50 representative studies, we demonstrate that a goal-oriented prompt formulation, which guides L",
"filename": "Towards_Goal-oriented_Prompt_Engineering_for_Large_2401.14043v3.pdf",
"arxiv_id": "2401.14043v3"
},
{
"title": "Prompt Engineering Guidelines for Using Large Language Models in Requirements Engineering",
"authors": [
"Krishna Ronanki",
"Simon Arvidsson",
"Johan Axell"
],
"published": "2025-07-04",
"summary": "The rapid emergence of generative AI models like Large Language Models (LLMs) has demonstrated its utility across various activities, including within Requirements Engineering (RE). Ensuring the quality and accuracy of LLM-generated output is critical, with prompt engineering serving as a key technique to guide model responses. However, existing literature provides limited guidance on how prompt engineering can be leveraged, specifically for RE activities. The objective of this study is to explo",
"filename": "Prompt_Engineering_Guidelines_for_Using_Large_Lang_2507.03405v1.pdf",
"arxiv_id": "2507.03405v1"
},
{
"title": "Morescient GAI for Software Engineering (Extended Version)",
"authors": [
"Marcus Kessel",
"Colin Atkinson"
],
"published": "2024-06-07",
"summary": "The ability of Generative AI (GAI) technology to automatically check, synthesize and modify software engineering artifacts promises to revolutionize all aspects of software engineering. Using GAI for software engineering tasks is consequently one of the most rapidly expanding fields of software engineering research, with over a hundred LLM-based code models having been published since 2021. However, the overwhelming majority of existing code models share a major weakness - they are exclusively t",
"filename": "Morescient_GAI_for_Software_Engineering_(Extended__2406.04710v2.pdf",
"arxiv_id": "2406.04710v2"
},
{
"title": "Prompt-with-Me: in-IDE Structured Prompt Management for LLM-Driven Software Engineering",
"authors": [
"Ziyou Li",
"Agnia Sergeyuk",
"Maliheh Izadi"
],
"published": "2025-09-21",
"summary": "Large Language Models are transforming software engineering, yet prompt management in practice remains ad hoc, hindering reliability, reuse, and integration into industrial workflows. We present Prompt-with-Me, a practical solution for structured prompt management embedded directly in the development environment. The system automatically classifies prompts using a four-dimensional taxonomy encompassing intent, author role, software development lifecycle stage, and prompt type. To enhance prompt ",
"filename": "Prompt-with-Me__in-IDE_Structured_Prompt_Managemen_2509.17096v1.pdf",
"arxiv_id": "2509.17096v1"
}
]