ccf_test.json•175 kB
{
"query": "machine learning",
"search_queries_executed": 1,
"filtering_groups": 0,
"total_unique_papers": 100,
"intersected_papers": 100,
"filtered_papers": 88,
"papers": [
{
"id": "2507.19477v1",
"title": "Advancing Event Forecasting through Massive Training of Large Language Models: Challenges, Solutions, and Broader Impacts",
"authors": [
"Sang-Woo Lee",
"Sohee Yang",
"Donghyun Kwak",
"Noah Y. Siegel"
],
"abstract": "Many recent papers have studied the development of superforecaster-level\nevent forecasting LLMs. While methodological problems with early studies cast\ndoubt on the use of LLMs for event forecasting, recent studies with improved\nevaluation methods have shown that state-of-the-art LLMs are gradually reaching\nsuperforecaster-level performance, and reinforcement learning has also been\nreported to improve future forecasting. Additionally, the unprecedented success\nof recent reasoning models and Deep Research-style models suggests that\ntechnology capable of greatly improving forecasting performance has been\ndeveloped. Therefore, based on these positive recent trends, we argue that the\ntime is ripe for research on large-scale training of superforecaster-level\nevent forecasting LLMs. We discuss two key research directions: training\nmethods and data acquisition. For training, we first introduce three\ndifficulties of LLM-based event forecasting training: noisiness-sparsity,\nknowledge cut-off, and simple reward structure problems. Then, we present\nrelated ideas to mitigate these problems: hypothetical event Bayesian networks,\nutilizing poorly-recalled and counterfactual events, and auxiliary reward\nsignals. For data, we propose aggressive use of market, public, and crawling\ndatasets to enable large-scale training and evaluation. Finally, we explain how\nthese technical advances could enable AI to provide predictive intelligence to\nsociety in broader areas. This position paper presents promising specific paths\nand considerations for getting closer to superforecaster-level AI technology,\naiming to call for researchers' interest in these directions.",
"categories": [
"cs.LG",
"cs.AI",
"cs.CL"
],
"published": "2025-07-25T17:59:13+00:00",
"url": "http://arxiv.org/pdf/2507.19477v1",
"resource_uri": "arxiv://2507.19477v1",
"citation_count": 0
},
{
"id": "2507.19473v1",
"title": "Let It Go? Not Quite: Addressing Item Cold Start in Sequential Recommendations with Content-Based Initialization",
"authors": [
"Anton Pembek",
"Artem Fatkulin",
"Anton Klenitskiy",
"Alexey Vasilev"
],
"abstract": "Many sequential recommender systems suffer from the cold start problem, where\nitems with few or no interactions cannot be effectively used by the model due\nto the absence of a trained embedding. Content-based approaches, which leverage\nitem metadata, are commonly used in such scenarios. One possible way is to use\nembeddings derived from content features such as textual descriptions as\ninitialization for the model embeddings. However, directly using frozen content\nembeddings often results in suboptimal performance, as they may not fully adapt\nto the recommendation task. On the other hand, fine-tuning these embeddings can\ndegrade performance for cold-start items, as item representations may drift far\nfrom their original structure after training. We propose a novel approach to\naddress this limitation. Instead of entirely freezing the content embeddings or\nfine-tuning them extensively, we introduce a small trainable delta to frozen\nembeddings that enables the model to adapt item representations without letting\nthem go too far from their original semantic structure. This approach\ndemonstrates consistent improvements across multiple datasets and modalities,\nincluding e-commerce datasets with textual descriptions and a music dataset\nwith audio-based representation.",
"categories": [
"cs.IR",
"cs.AI",
"cs.LG"
],
"published": "2025-07-25T17:57:31+00:00",
"url": "http://arxiv.org/pdf/2507.19473v1",
"resource_uri": "arxiv://2507.19473v1",
"citation_count": 0
},
{
"id": "2507.19465v1",
"title": "Linearly Convergent Algorithms for Nonsmooth Problems with Unknown Smooth Pieces",
"authors": [
"Zhe Zhang",
"Suvrit Sra"
],
"abstract": "We develop efficient algorithms for optimizing piecewise smooth (PWS)\nfunctions where the underlying partition of the domain into smooth pieces is\n\\emph{unknown}. For PWS functions satisfying a quadratic growth (QG) condition,\nwe propose a bundle-level (BL) type method that achieves global linear\nconvergence -- to our knowledge, the first such result for any algorithm for\nthis problem class. We extend this method to handle approximately PWS functions\nand to solve weakly-convex PWS problems, improving the state-of-the-art\ncomplexity to match the benchmark for smooth non-convex optimization.\nFurthermore, we introduce the first verifiable and accurate termination\ncriterion for PWS optimization. Similar to the gradient norm in smooth\noptimization, this certificate tightly characterizes the optimality gap under\nthe QG condition, and can moreover be evaluated without knowledge of any\nproblem parameters. We develop a search subroutine for this certificate and\nembed it within a guess-and-check framework, resulting in an almost\nparameter-free algorithm for both the convex QG and weakly-convex settings.",
"categories": [
"math.OC",
"cs.LG"
],
"published": "2025-07-25T17:50:43+00:00",
"url": "http://arxiv.org/pdf/2507.19465v1",
"resource_uri": "arxiv://2507.19465v1",
"citation_count": 0
},
{
"id": "2507.19459v1",
"title": "Fast Learning of Non-Cooperative Spacecraft 3D Models through Primitive Initialization",
"authors": [
"Pol Francesch Huc",
"Emily Bates",
"Simone D'Amico"
],
"abstract": "The advent of novel view synthesis techniques such as NeRF and 3D Gaussian\nSplatting (3DGS) has enabled learning precise 3D models only from posed\nmonocular images. Although these methods are attractive, they hold two major\nlimitations that prevent their use in space applications: they require poses\nduring training, and have high computational cost at training and inference. To\naddress these limitations, this work contributes: (1) a Convolutional Neural\nNetwork (CNN) based primitive initializer for 3DGS using monocular images; (2)\na pipeline capable of training with noisy or implicit pose estimates; and (3)\nand analysis of initialization variants that reduce the training cost of\nprecise 3D models. A CNN takes a single image as input and outputs a coarse 3D\nmodel represented as an assembly of primitives, along with the target's pose\nrelative to the camera. This assembly of primitives is then used to initialize\n3DGS, significantly reducing the number of training iterations and input images\nneeded -- by at least an order of magnitude. For additional flexibility, the\nCNN component has multiple variants with different pose estimation techniques.\nThis work performs a comparison between these variants, evaluating their\neffectiveness for downstream 3DGS training under noisy or implicit pose\nestimates. The results demonstrate that even with imperfect pose supervision,\nthe pipeline is able to learn high-fidelity 3D representations, opening the\ndoor for the use of novel view synthesis in space applications.",
"categories": [
"cs.CV",
"cs.LG",
"cs.RO"
],
"published": "2025-07-25T17:43:29+00:00",
"url": "http://arxiv.org/pdf/2507.19459v1",
"resource_uri": "arxiv://2507.19459v1",
"citation_count": 0
},
{
"id": "2507.19458v1",
"title": "Hierarchical Deep Reinforcement Learning Framework for Multi-Year Asset Management Under Budget Constraints",
"authors": [
"Amir Fard",
"Arnold X. -X. Yuan"
],
"abstract": "Budget planning and maintenance optimization are crucial for infrastructure\nasset management, ensuring cost-effectiveness and sustainability. However, the\ncomplexity arising from combinatorial action spaces, diverse asset\ndeterioration, stringent budget constraints, and environmental uncertainty\nsignificantly limits existing methods' scalability. This paper proposes a\nHierarchical Deep Reinforcement Learning methodology specifically tailored to\nmulti-year infrastructure planning. Our approach decomposes the problem into\ntwo hierarchical levels: a high-level Budget Planner allocating annual budgets\nwithin explicit feasibility bounds, and a low-level Maintenance Planner\nprioritizing assets within the allocated budget. By structurally separating\nmacro-budget decisions from asset-level prioritization and integrating linear\nprogramming projection within a hierarchical Soft Actor-Critic framework, the\nmethod efficiently addresses exponential growth in the action space and ensures\nrigorous budget compliance. A case study evaluating sewer networks of varying\nsizes (10, 15, and 20 sewersheds) illustrates the effectiveness of the proposed\napproach. Compared to conventional Deep Q-Learning and enhanced genetic\nalgorithms, our methodology converges more rapidly, scales effectively, and\nconsistently delivers near-optimal solutions even as network size grows.",
"categories": [
"cs.AI",
"cs.LG",
"cs.SY",
"eess.SY",
"math.OC"
],
"published": "2025-07-25T17:42:34+00:00",
"url": "http://arxiv.org/pdf/2507.19458v1",
"resource_uri": "arxiv://2507.19458v1",
"citation_count": 0
},
{
"id": "2507.19455v1",
"title": "Forest-Guided Clustering -- Shedding Light into the Random Forest Black Box",
"authors": [
"Lisa Barros de Andrade e Sousa",
"Gregor Miller",
"Ronan Le Gleut",
"Dominik Thalmeier",
"Helena Pelin",
"Marie Piraud"
],
"abstract": "As machine learning models are increasingly deployed in sensitive application\nareas, the demand for interpretable and trustworthy decision-making has\nincreased. Random Forests (RF), despite their widespread use and strong\nperformance on tabular data, remain difficult to interpret due to their\nensemble nature. We present Forest-Guided Clustering (FGC), a model-specific\nexplainability method that reveals both local and global structure in RFs by\ngrouping instances according to shared decision paths. FGC produces\nhuman-interpretable clusters aligned with the model's internal logic and\ncomputes cluster-specific and global feature importance scores to derive\ndecision rules underlying RF predictions. FGC accurately recovered latent\nsubclass structure on a benchmark dataset and outperformed classical clustering\nand post-hoc explanation methods. Applied to an AML transcriptomic dataset, FGC\nuncovered biologically coherent subpopulations, disentangled disease-relevant\nsignals from confounders, and recovered known and novel gene expression\npatterns. FGC bridges the gap between performance and interpretability by\nproviding structure-aware insights that go beyond feature-level attribution.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T17:41:39+00:00",
"url": "http://arxiv.org/pdf/2507.19455v1",
"resource_uri": "arxiv://2507.19455v1",
"citation_count": 0
},
{
"id": "2507.19438v1",
"title": "Gradient-based grand canonical optimization enabled by graph neural networks with fractional atomic existence",
"authors": [
"Mads-Peter Verner Christiansen",
"Bjørk Hammer"
],
"abstract": "Machine learning interatomic potentials have become an indispensable tool for\nmaterials science, enabling the study of larger systems and longer timescales.\nState-of-the-art models are generally graph neural networks that employ message\npassing to iteratively update atomic embeddings that are ultimately used for\npredicting properties. In this work we extend the message passing formalism\nwith the inclusion of a continuous variable that accounts for fractional atomic\nexistence. This allows us to calculate the gradient of the Gibbs free energy\nwith respect to both the Cartesian coordinates of atoms and their existence.\nUsing this we propose a gradient-based grand canonical optimization method and\ndocument its capabilities for a Cu(110) surface oxide.",
"categories": [
"cond-mat.mtrl-sci",
"cs.LG"
],
"published": "2025-07-25T17:13:41+00:00",
"url": "http://arxiv.org/pdf/2507.19438v1",
"resource_uri": "arxiv://2507.19438v1",
"citation_count": 0
},
{
"id": "2507.19437v1",
"title": "Observations Meet Actions: Learning Control-Sufficient Representations for Robust Policy Generalization",
"authors": [
"Yuliang Gu",
"Hongpeng Cao",
"Marco Caccamo",
"Naira Hovakimyan"
],
"abstract": "Capturing latent variations (\"contexts\") is key to deploying\nreinforcement-learning (RL) agents beyond their training regime. We recast\ncontext-based RL as a dual inference-control problem and formally characterize\ntwo properties and their hierarchy: observation sufficiency (preserving all\npredictive information) and control sufficiency (retaining decision-making\nrelevant information). Exploiting this dichotomy, we derive a contextual\nevidence lower bound(ELBO)-style objective that cleanly separates\nrepresentation learning from policy learning and optimizes it with Bottlenecked\nContextual Policy Optimization (BCPO), an algorithm that places a variational\ninformation-bottleneck encoder in front of any off-policy policy learner. On\nstandard continuous-control benchmarks with shifting physical parameters, BCPO\nmatches or surpasses other baselines while using fewer samples and retaining\nperformance far outside the training regime. The framework unifies theory,\ndiagnostics, and practice for context-based RL.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T17:08:16+00:00",
"url": "http://arxiv.org/pdf/2507.19437v1",
"resource_uri": "arxiv://2507.19437v1",
"citation_count": 0
},
{
"id": "2507.19435v1",
"title": "Equivariant machine learning of Electric Field Gradients -- Predicting the quadrupolar coupling constant in the MAPbI$_3$ phase transition",
"authors": [
"Bernhard Schmiedmayer",
"J. W. Wolffs",
"Gilles A. de Wijs",
"Arno P. M. Kentgens",
"Jonathan Lahnsteiner",
"Georg Kresse"
],
"abstract": "We present a strategy combining machine learning and first-principles\ncalculations to achieve highly accurate nuclear quadrupolar coupling constant\npredictions. Our approach employs two distinct machine-learning frameworks: a\nmachine-learned force field to generate molecular dynamics trajectories and a\nsecond model for electric field gradients that preserves rotational and\ntranslational symmetries. By incorporating thermostat-driven molecular dynamics\nsampling, we enable the prediction of quadrupolar coupling constants in highly\ndisordered materials at finite temperatures. We validate our method by\npredicting the tetragonal-to-cubic phase transition temperature of the\norganic-inorganic halide perovskite MAPbI$_3$, obtaining results that closely\nmatch experimental data.",
"categories": [
"cond-mat.mtrl-sci"
],
"published": "2025-07-25T17:04:21+00:00",
"url": "http://arxiv.org/pdf/2507.19435v1",
"resource_uri": "arxiv://2507.19435v1",
"citation_count": 0
},
{
"id": "2507.19427v1",
"title": "Step-3 is Large yet Affordable: Model-system Co-design for Cost-effective Decoding",
"authors": [
"StepFun",
":",
"Bin Wang",
"Bojun Wang",
"Changyi Wan",
"Guanzhe Huang",
"Hanpeng Hu",
"Haonan Jia",
"Hao Nie",
"Mingliang Li",
"Nuo Chen",
"Siyu Chen",
"Song Yuan",
"Wuxun Xie",
"Xiaoniu Song",
"Xing Chen",
"Xingping Yang",
"Xuelin Zhang",
"Yanbo Yu",
"Yaoyu Wang",
"Yibo Zhu",
"Yimin Jiang",
"Yu Zhou",
"Yuanwei Lu",
"Houyi Li",
"Jingcheng Hu",
"Ka Man Lo",
"Ailin Huang",
"Binxing Jiao",
"Bo Li",
"Boyu Chen",
"Changxin Miao",
"Chang Lou",
"Chen Hu",
"Chen Xu",
"Chenfeng Yu",
"Chengyuan Yao",
"Daokuan Lv",
"Dapeng Shi",
"Deshan Sun",
"Ding Huang",
"Dingyuan Hu",
"Dongqing Pang",
"Enle Liu",
"Fajie Zhang",
"Fanqi Wan",
"Gulin Yan",
"Han Zhang",
"Han Zhou",
"Hanghao Wu",
"Hangyu Guo",
"Hanqi Chen",
"Hanshan Zhang",
"Hao Wu",
"Haocheng Zhang",
"Haolong Yan",
"Haoran Lv",
"Haoran Wei",
"Hebin Zhou",
"Heng Wang",
"Heng Wang",
"Hongxin Li",
"Hongyu Zhou",
"Hongyuan Wang",
"Huiyong Guo",
"Jia Wang",
"Jiahao Gong",
"Jialing Xie",
"Jian Zhou",
"Jianjian Sun",
"Jiaoren Wu",
"Jiaran Zhang",
"Jiayu Liu",
"Jie Cheng",
"Jie Luo",
"Jie Yan",
"Jie Yang",
"Jieyi Hou",
"Jinguang Zhang",
"Jinlan Cao",
"Jisheng Yin",
"Junfeng Liu",
"Junhao Huang",
"Junzhe Lin",
"Kaijun Tan",
"Kaixiang Li",
"Kang An",
"Kangheng Lin",
"Kenkun Liu",
"Lei Yang",
"Liang Zhao",
"Liangyu Chen",
"Lieyu Shi",
"Liguo Tan",
"Lin Lin",
"Lin Zhang",
"Lina Chen",
"Liwen Huang",
"Liying Shi",
"Longlong Gu",
"Mei Chen",
"Mengqiang Ren",
"Ming Li",
"Mingzhe Chen",
"Na Wang",
"Nan Wu",
"Qi Han",
"Qian Zhao",
"Qiang Zhang",
"Qianni Liu",
"Qiaohui Chen",
"Qiling Wu",
"Qinglin He",
"Qinyuan Tan",
"Qiufeng Wang",
"Qiuping Wu",
"Qiuyan Liang",
"Quan Sun",
"Rui Li",
"Ruihang Miao",
"Ruosi Wan",
"Ruyan Guo",
"Shangwu Zhong",
"Shaoliang Pang",
"Shengjie Fan",
"Shijie Shang",
"Shilei Jiang",
"Shiliang Yang",
"Shiming Hao",
"Shuli Gao",
"Siming Huang",
"Siqi Liu",
"Tiancheng Cao",
"Tianhao Cheng",
"Tianhao Peng",
"Wang You",
"Wei Ji",
"Wen Sun",
"Wenjin Deng",
"Wenqing He",
"Wenzhen Zheng",
"Xi Chen",
"Xiangwen Kong",
"Xianzhen Luo",
"Xiaobo Yang",
"Xiaojia Liu",
"Xiaoxiao Ren",
"Xin Han",
"Xin Li",
"Xin Wu",
"Xu Zhao",
"Yanan Wei",
"Yang Li",
"Yangguang Li",
"Yangshijie Xu",
"Yanming Xu",
"Yaqiang Shi",
"Yeqing Shen",
"Yi Yang",
"Yifei Yang",
"Yifeng Gong",
"Yihan Chen",
"Yijing Yang",
"Yinmin Zhang",
"Yizhuang Zhou",
"Yuanhao Ding",
"Yuantao Fan",
"Yuanzhen Yang",
"Yuchu Luo",
"Yue Peng",
"Yufan Lu",
"Yuhang Deng",
"Yuhe Yin",
"Yujie Liu",
"Yukun Chen",
"Yuling Zhao",
"Yun Mou",
"Yunlong Li",
"Yunzhou Ju",
"Yusheng Li",
"Yuxiang Yang",
"Yuxiang Zhang",
"Yuyang Chen",
"Zejia Weng",
"Zhe Xie",
"Zheng Ge",
"Zheng Gong",
"Zhenyi Lu",
"Zhewei Huang",
"Zhichao Chang",
"Zhiguo Huang",
"Zhirui Wang",
"Zidong Yang",
"Zili Wang",
"Ziqi Wang",
"Zixin Zhang",
"Binxing Jiao",
"Daxin Jiang",
"Heung-Yeung Shum",
"Xiangyu Zhang"
],
"abstract": "Large language models (LLMs) face low hardware efficiency during decoding,\nespecially for long-context reasoning tasks. This paper introduces Step-3, a\n321B-parameter VLM with hardware-aware model-system co-design optimized for\nminimizing decoding costs. Step-3 innovates in two key dimensions: (1) A novel\nMulti-Matrix Factorization Attention (MFA) mechanism that significantly reduces\nboth KV cache size and computation while maintaining high attention\nexpressiveness, and (2) Attention-FFN Disaggregation (AFD), a distributed\ninference system that decouples attention and Feed-Forward Network (FFN) layers\ninto specialized subsystems. This co-design achieves unprecedented cost\nefficiency: Step-3 significantly reduces theoretical decoding costs compared\nwith models like DeepSeek-V3 and Qwen3 MoE 235B, with the gains widening at\nlonger context. Step-3 achieves low cost while activating 38B parameters per\ntoken (more than DeepSeek-V3 and Qwen3 MoE 235B), demonstrating that\nhardware-aligned attention arithmetic intensity, MoE sparsity, and AFD are\ncritical to cost-effectiveness. We perform a head-to-head comparison with\nDeepSeek-V3 in its favorable scenarios. Our implementation on Hopper GPUs\nachieves a decoding throughput of up to 4,039 tokens per second per GPU under\n50ms TPOT SLA (4K context, FP8, no MTP). It is higher than DeepSeek-V3's 2,324\nin the same setup and sets a new Pareto frontier for LLM decoding.",
"categories": [
"cs.LG",
"cs.AI"
],
"published": "2025-07-25T16:53:13+00:00",
"url": "http://arxiv.org/pdf/2507.19427v1",
"resource_uri": "arxiv://2507.19427v1",
"citation_count": 0
},
{
"id": "2507.19425v1",
"title": "Machine Learning Based Efficiency Calculator (MaLBEC) for Nuclear Fusion Diagnostics",
"authors": [
"Kimberley Lennon",
"Chantal Shand",
"Gemma Wilson",
"Robin Smith"
],
"abstract": "Diagnostics are critical for commercial and research fusion machines, since\nmeasuring and understanding plasma features is important to sustaining fusion\nreactions. The neutron flux (and therefore fusion power) can be indirectly\ncalculated using neutron activation analyses, where potentially large numbers\nof activation foils are placed in the neutron flux, and delayed gammas from key\nreactions are measured via gamma spectrometry. In gamma spectrometry, absolute\nefficiency forms part of the activity calculation, and equals to the ratio of\nthe total number of photons detected to the number emitted by a radioactive\nsample. Hence, it is imperative that they are calculated efficiently and\naccurately. This paper presents a novel digital efficiency calculation\nalgorithm, the Machine Learning Based Efficiency Calculator (MaLBEC), that uses\nstate-of-the-art supervised machine learning techniques to calculate efficiency\nvalues of a given sample, from only four inputs. In this paper, the performance\nof the MaLBEC is demonstrated with a fusion sample and compares the values to a\ntraditional efficiency calculation method, Monte Carlo N-Particle (MCNP). The\nefficiencies from the MaLBEC were within an average 5\\% of the ones produced by\nMCNP, but with an exceptional reduction in computation time of 99.96\\%. When\nthe efficiency values from both methods were used in the activity calculation,\nthe MaLBEC was within 3\\% of the MCNP results.",
"categories": [
"physics.ins-det",
"nucl-ex"
],
"published": "2025-07-25T16:44:46+00:00",
"url": "http://arxiv.org/pdf/2507.19425v1",
"resource_uri": "arxiv://2507.19425v1",
"citation_count": 0
},
{
"id": "2507.19423v1",
"title": "Perfect Clustering in Very Sparse Diverse Multiplex Networks",
"authors": [
"Marianna Pensky"
],
"abstract": "The paper studies the DIverse MultiPLEx Signed Generalized Random Dot Product\nGraph (DIMPLE-SGRDPG) network model (Pensky (2024)), where all layers of the\nnetwork have the same collection of nodes. In addition, all layers can be\npartitioned into groups such that the layers in the same group are embedded in\nthe same ambient subspace but otherwise matrices of connection probabilities\ncan be all different. This setting includes majority of multilayer network\nmodels as its particular cases. The key task in this model is to recover the\ngroups of layers with unique subspace structures, since the case where all\nlayers of the network are embedded in the same subspace has been fairly well\nstudied. Until now, clustering of layers in such networks was based on the\nlayer-per-layer analysis, which required the multilayer network to be\nsufficiently dense. Nevertheless, in this paper we succeeded in pooling\ninformation in all layers together and providing a tensor-based methodology\nthat ensures perfect clustering for a much sparser network. Our theoretical\nresults, established under intuitive non-restrictive assumptions, assert that\nthe new technique achieves perfect clustering under sparsity conditions that,\nup to logarithmic factors, coincide with the computational lower bound derived\nfor a much simpler model.",
"categories": [
"stat.ML",
"cs.LG",
"math.ST",
"stat.ME",
"stat.TH"
],
"published": "2025-07-25T16:43:42+00:00",
"url": "http://arxiv.org/pdf/2507.19423v1",
"resource_uri": "arxiv://2507.19423v1",
"citation_count": 0
},
{
"id": "2507.19420v1",
"title": "CircuitProbe: Dissecting Spatiotemporal Visual Semantics with Circuit Tracing",
"authors": [
"Yiming Zhang",
"Chengzhang Yu",
"Zhuokai Zhao",
"Kun Wang",
"Qiankun Li",
"Zihan Chen",
"Yang Liu",
"Zenghui Ding",
"Yining Sun"
],
"abstract": "The processing mechanisms underlying language and image understanding in\nlarge vision-language models (LVLMs) have been extensively studied. However,\nthe internal reasoning mechanisms of LVLMs for spatiotemporal understanding\nremain poorly understood. In this work, we introduce a systematic,\ncircuit-based framework designed to investigate how spatiotemporal visual\nsemantics are represented and processed within these LVLMs. Specifically, our\nframework comprises three circuits: visual auditing circuit, semantic tracing\ncircuit, and attention flow circuit. Through the lens of these circuits, we\ndiscover that visual semantics are highly localized to specific object\ntokens--removing these tokens can degrade model performance by up to 92.6%.\nFurthermore, we identify that interpretable concepts of objects and actions\nemerge and become progressively refined in the middle-to-late layers of LVLMs.\nIn contrary to the current works that solely focus on objects in one image, we\nreveal that the middle-to-late layers of LVLMs exhibit specialized functional\nlocalization for spatiotemporal semantics. Our findings offer significant\nmechanistic insights into spatiotemporal semantics analysis of LVLMs, laying a\nfoundation for designing more robust and interpretable models.",
"categories": [
"cs.CV",
"cs.LG"
],
"published": "2025-07-25T16:38:18+00:00",
"url": "http://arxiv.org/pdf/2507.19420v1",
"resource_uri": "arxiv://2507.19420v1",
"citation_count": 0
},
{
"id": "2507.19413v1",
"title": "Riesz representers for the rest of us",
"authors": [
"Nicholas T. Williams",
"Oliver J. Hines",
"Kara E. Rudolph"
],
"abstract": "The application of semiparametric efficient estimators, particularly those\nthat leverage machine learning, is rapidly expanding within epidemiology and\ncausal inference. Much of the recent methodological literature on these\nestimators relies heavily on the Riesz representation theorem and Riesz\nregression. This paper aims to introduce the Riesz representation theorem to an\napplied audience, explaining why and how Riesz regression is becoming widely\nused in the semiparametric estimator statistical literature.",
"categories": [
"math.ST",
"stat.TH"
],
"published": "2025-07-25T16:27:17+00:00",
"url": "http://arxiv.org/pdf/2507.19413v1",
"resource_uri": "arxiv://2507.19413v1",
"citation_count": 0
},
{
"id": "2507.19411v1",
"title": "SILS: Strategic Influence on Liquidity Stability and Whale Detection in Concentrated-Liquidity DEXs",
"authors": [
"Ali RajabiNekoo",
"Laleh Rasoul",
"Amirfarhad Farhadi",
"Azadeh Zamanifar"
],
"abstract": "Traditional methods for identifying impactful liquidity providers (LPs) in\nConcentrated Liquidity Market Makers (CLMMs) rely on broad measures, such as\nnominal capital size or surface-level activity, which often lead to inaccurate\nrisk analysis. The SILS framework offers a significantly more detailed\napproach, characterizing LPs not just as capital holders but as dynamic\nsystemic agents whose actions directly impact market stability. This represents\na fundamental paradigm shift from the static, volume-based analysis to a\ndynamic, impact-focused understanding. This advanced approach uses on-chain\nevent logs and smart contract execution traces to compute Exponential\nTime-Weighted Liquidity (ETWL) profiles and apply unsupervised anomaly\ndetection. Most importantly, it defines an LP's functional importance through\nthe Liquidity Stability Impact Score (LSIS), a counterfactual metric that\nmeasures the potential degradation of the market if the LP withdraws. This\ncombined approach provides a more detailed and realistic characterization of an\nLP's impact, moving beyond the binary and often misleading classifications used\nby existing methods. This impact-focused and comprehensive approach enables\nSILS to accurately identify high-impact LPs-including those missed by\ntraditional methods and supports essential applications like a protective\noracle layer and actionable trader signals, thereby significantly enhancing\nDeFi ecosystem. The framework provides unprecedented transparency into the\nunderlying liquidity structure and associated risks, effectively reducing the\ncommon false positives and uncovering critical false negatives found in\ntraditional models. Therefore, SILS provides an effective mechanism for\nproactive risk management, transforming how DeFi protocols safeguard their\necosystems against asymmetric liquidity behavior.",
"categories": [
"cs.LG",
"cs.CR",
"cs.ET"
],
"published": "2025-07-25T16:21:18+00:00",
"url": "http://arxiv.org/pdf/2507.19411v1",
"resource_uri": "arxiv://2507.19411v1",
"citation_count": 0
},
{
"id": "2507.19408v1",
"title": "On Arbitrary Predictions from Equally Valid Models",
"authors": [
"Sarah Lockfisch",
"Kristian Schwethelm",
"Martin Menten",
"Rickmer Braren",
"Daniel Rueckert",
"Alexander Ziller",
"Georgios Kaissis"
],
"abstract": "Model multiplicity refers to the existence of multiple machine learning\nmodels that describe the data equally well but may produce different\npredictions on individual samples. In medicine, these models can admit\nconflicting predictions for the same patient -- a risk that is poorly\nunderstood and insufficiently addressed.\n In this study, we empirically analyze the extent, drivers, and ramifications\nof predictive multiplicity across diverse medical tasks and model\narchitectures, and show that even small ensembles can mitigate/eliminate\npredictive multiplicity in practice. Our analysis reveals that (1) standard\nvalidation metrics fail to identify a uniquely optimal model and (2) a\nsubstantial amount of predictions hinges on arbitrary choices made during model\ndevelopment. Using multiple models instead of a single model reveals instances\nwhere predictions differ across equally plausible models -- highlighting\npatients that would receive arbitrary diagnoses if any single model were used.\nIn contrast, (3) a small ensemble paired with an abstention strategy can\neffectively mitigate measurable predictive multiplicity in practice;\npredictions with high inter-model consensus may thus be amenable to automated\nclassification. While accuracy is not a principled antidote to predictive\nmultiplicity, we find that (4) higher accuracy achieved through increased model\ncapacity reduces predictive multiplicity.\n Our findings underscore the clinical importance of accounting for model\nmultiplicity and advocate for ensemble-based strategies to improve diagnostic\nreliability. In cases where models fail to reach sufficient consensus, we\nrecommend deferring decisions to expert review.",
"categories": [
"cs.LG",
"cs.AI"
],
"published": "2025-07-25T16:15:59+00:00",
"url": "http://arxiv.org/pdf/2507.19408v1",
"resource_uri": "arxiv://2507.19408v1",
"citation_count": 0
},
{
"id": "2507.19402v1",
"title": "FD4QC: Application of Classical and Quantum-Hybrid Machine Learning for Financial Fraud Detection A Technical Report",
"authors": [
"Matteo Cardaioli",
"Luca Marangoni",
"Giada Martini",
"Francesco Mazzolin",
"Luca Pajola",
"Andrea Ferretto Parodi",
"Alessandra Saitta",
"Maria Chiara Vernillo"
],
"abstract": "The increasing complexity and volume of financial transactions pose\nsignificant challenges to traditional fraud detection systems. This technical\nreport investigates and compares the efficacy of classical, quantum, and\nquantum-hybrid machine learning models for the binary classification of\nfraudulent financial activities.\n As of our methodology, first, we develop a comprehensive behavioural feature\nengineering framework to transform raw transactional data into a rich,\ndescriptive feature set. Second, we implement and evaluate a range of models on\nthe IBM Anti-Money Laundering (AML) dataset. The classical baseline models\ninclude Logistic Regression, Decision Tree, Random Forest, and XGBoost. These\nare compared against three hybrid classic quantum algorithms architectures: a\nQuantum Support Vector Machine (QSVM), a Variational Quantum Classifier (VQC),\nand a Hybrid Quantum Neural Network (HQNN).\n Furthermore, we propose Fraud Detection for Quantum Computing (FD4QC), a\npractical, API-driven system architecture designed for real-world deployment,\nfeaturing a classical-first, quantum-enhanced philosophy with robust fallback\nmechanisms.\n Our results demonstrate that classical tree-based models, particularly\n\\textit{Random Forest}, significantly outperform the quantum counterparts in\nthe current setup, achieving high accuracy (\\(97.34\\%\\)) and F-measure\n(\\(86.95\\%\\)). Among the quantum models, \\textbf{QSVM} shows the most promise,\ndelivering high precision (\\(77.15\\%\\)) and a low false-positive rate\n(\\(1.36\\%\\)), albeit with lower recall and significant computational overhead.\n This report provides a benchmark for a real-world financial application,\nhighlights the current limitations of quantum machine learning in this domain,\nand outlines promising directions for future research.",
"categories": [
"cs.LG",
"cs.CE"
],
"published": "2025-07-25T16:08:22+00:00",
"url": "http://arxiv.org/pdf/2507.19402v1",
"resource_uri": "arxiv://2507.19402v1",
"citation_count": 0
},
{
"id": "2507.19382v1",
"title": "Learning Long-Range Representations with Equivariant Messages",
"authors": [
"Egor Rumiantsev",
"Marcel F. Langer",
"Tulga-Erdene Sodjargal",
"Michele Ceriotti",
"Philip Loche"
],
"abstract": "Machine learning interatomic potentials trained on first-principles reference\ndata are quickly becoming indispensable for computational physics, biology, and\nchemistry. Equivariant message-passing neural networks, including transformers,\nare considered state-of-the-art for this task. Since applications require\nefficient scaling with system size, such models cannot act on fully connected\natomistic graphs and thus neglect interactions beyond a certain cutoff,\nconsequently failing to model long-range effects like electrostatics,\ndispersion, or electron delocalization. While long-range correction schemes\nbased on inverse power laws of interatomic distances have been proposed, they\nare unable to communicate higher-order geometric information and are thus\nlimited in applicability. To address this shortcoming, we propose the use of\nequivariant, rather than scalar, charges for long-range interactions, and\ndesign a graph neural network architecture, LOREM, around this long-range\nmessage passing mechanism. Through tests on a number of long-range datasets, we\nconfirm that equivariant charges enable the learning of orientation-dependent\ninteractions, and that the proposed model is competitive with, or surpasses,\nother approaches. Moreover, LOREM does not require adapting interaction cutoffs\nor the number of message passing steps to model long-range interactions, which\ncontributes to its robustness across different systems.",
"categories": [
"physics.chem-ph"
],
"published": "2025-07-25T15:36:18+00:00",
"url": "http://arxiv.org/pdf/2507.19382v1",
"resource_uri": "arxiv://2507.19382v1",
"citation_count": 0
},
{
"id": "2507.19377v1",
"title": "Deep Reinforcement Learning-Based Scheduling for Wi-Fi Multi-Access Point Coordination",
"authors": [
"David Nunez",
"Francesc Wilhelmi",
"Maksymilian Wojnar",
"Katarzyna Kosek-Szott",
"Szymon Szott",
"Boris Bellalta"
],
"abstract": "Multi-access point coordination (MAPC) is a key feature of IEEE 802.11bn,\nwith a potential impact on future Wi-Fi networks. MAPC enables joint scheduling\ndecisions across multiple access points (APs) to improve throughput, latency,\nand reliability in dense Wi-Fi deployments. However, implementing efficient\nscheduling policies under diverse traffic and interference conditions in\noverlapping basic service sets (OBSSs) remains a complex task. This paper\npresents a method to minimize the network-wide worst-case latency by\nformulating MAPC scheduling as a sequential decision-making problem and\nproposing a deep reinforcement learning (DRL) mechanism to minimize worst-case\ndelays in OBSS deployments. Specifically, we train a DRL agent using proximal\npolicy optimization (PPO) within an 802.11bn-compatible Gymnasium environment.\nThis environment provides observations of queue states, delay metrics, and\nchannel conditions, enabling the agent to schedule multiple AP-station pairs to\ntransmit simultaneously by leveraging spatial reuse (SR) groups. Simulations\ndemonstrate that our proposed solution outperforms state-of-the-art heuristic\nstrategies across a wide range of network loads and traffic patterns. The\ntrained machine learning (ML) models consistently achieve lower 99th-percentile\ndelays, showing up to a 30% improvement over the best baseline.",
"categories": [
"cs.NI"
],
"published": "2025-07-25T15:26:25+00:00",
"url": "http://arxiv.org/pdf/2507.19377v1",
"resource_uri": "arxiv://2507.19377v1",
"citation_count": 0
},
{
"id": "2507.19372v1",
"title": "Learning neuro-symbolic convergent term rewriting systems",
"authors": [
"Flavio Petruzzellis",
"Alberto Testolin",
"Alessandro Sperduti"
],
"abstract": "Building neural systems that can learn to execute symbolic algorithms is a\nchallenging open problem in artificial intelligence, especially when aiming for\nstrong generalization and out-of-distribution performance. In this work, we\nintroduce a general framework for learning convergent term rewriting systems\nusing a neuro-symbolic architecture inspired by the rewriting algorithm itself.\nWe present two modular implementations of such architecture: the Neural\nRewriting System (NRS) and the Fast Neural Rewriting System (FastNRS). As a\nresult of algorithmic-inspired design and key architectural elements, both\nmodels can generalize to out-of-distribution instances, with FastNRS offering\nsignificant improvements in terms of memory efficiency, training speed, and\ninference time. We evaluate both architectures on four tasks involving the\nsimplification of mathematical formulas and further demonstrate their\nversatility in a multi-domain learning scenario, where a single model is\ntrained to solve multiple types of problems simultaneously. The proposed system\nsignificantly outperforms two strong neural baselines: the Neural Data Router,\na recent transformer variant specifically designed to solve algorithmic\nproblems, and GPT-4o, one of the most powerful general-purpose large-language\nmodels. Moreover, our system matches or outperforms the latest o1-preview model\nfrom OpenAI that excels in reasoning benchmarks.",
"categories": [
"cs.AI",
"cs.LG"
],
"published": "2025-07-25T15:24:56+00:00",
"url": "http://arxiv.org/pdf/2507.19372v1",
"resource_uri": "arxiv://2507.19372v1",
"citation_count": 0
},
{
"id": "2507.19368v1",
"title": "Counterfactual Explanations in Medical Imaging: Exploring SPN-Guided Latent Space Manipulation",
"authors": [
"Julia Siekiera",
"Stefan Kramer"
],
"abstract": "Artificial intelligence is increasingly leveraged across various domains to\nautomate decision-making processes that significantly impact human lives. In\nmedical image analysis, deep learning models have demonstrated remarkable\nperformance. However, their inherent complexity makes them black box systems,\nraising concerns about reliability and interpretability. Counterfactual\nexplanations provide comprehensible insights into decision processes by\npresenting hypothetical \"what-if\" scenarios that alter model classifications.\nBy examining input alterations, counterfactual explanations provide patterns\nthat influence the decision-making process. Despite their potential, generating\nplausible counterfactuals that adhere to similarity constraints providing\nhuman-interpretable explanations remains a challenge. In this paper, we\ninvestigate this challenge by a model-specific optimization approach. While\ndeep generative models such as variational autoencoders (VAEs) exhibit\nsignificant generative power, probabilistic models like sum-product networks\n(SPNs) efficiently represent complex joint probability distributions. By\nmodeling the likelihood of a semi-supervised VAE's latent space with an SPN, we\nleverage its dual role as both a latent space descriptor and a classifier for a\ngiven discrimination task. This formulation enables the optimization of latent\nspace counterfactuals that are both close to the original data distribution and\naligned with the target class distribution. We conduct experimental evaluation\non the cheXpert dataset. To evaluate the effectiveness of the integration of\nSPNs, our SPN-guided latent space manipulation is compared against a neural\nnetwork baseline. Additionally, the trade-off between latent variable\nregularization and counterfactual quality is analyzed.",
"categories": [
"cs.LG",
"cs.AI"
],
"published": "2025-07-25T15:19:32+00:00",
"url": "http://arxiv.org/pdf/2507.19368v1",
"resource_uri": "arxiv://2507.19368v1",
"citation_count": 0
},
{
"id": "2507.19362v1",
"title": "LOTUS: A Leaderboard for Detailed Image Captioning from Quality to Societal Bias and User Preferences",
"authors": [
"Yusuke Hirota",
"Boyi Li",
"Ryo Hachiuma",
"Yueh-Hua Wu",
"Boris Ivanovic",
"Yuta Nakashima",
"Marco Pavone",
"Yejin Choi",
"Yu-Chiang Frank Wang",
"Chao-Han Huck Yang"
],
"abstract": "Large Vision-Language Models (LVLMs) have transformed image captioning,\nshifting from concise captions to detailed descriptions. We introduce LOTUS, a\nleaderboard for evaluating detailed captions, addressing three main gaps in\nexisting evaluations: lack of standardized criteria, bias-aware assessments,\nand user preference considerations. LOTUS comprehensively evaluates various\naspects, including caption quality (e.g., alignment, descriptiveness), risks\n(\\eg, hallucination), and societal biases (e.g., gender bias) while enabling\npreference-oriented evaluations by tailoring criteria to diverse user\npreferences. Our analysis of recent LVLMs reveals no single model excels across\nall criteria, while correlations emerge between caption detail and bias risks.\nPreference-oriented evaluations demonstrate that optimal model selection\ndepends on user priorities.",
"categories": [
"cs.CV",
"cs.AI",
"cs.CL",
"cs.CY",
"cs.LG"
],
"published": "2025-07-25T15:12:42+00:00",
"url": "http://arxiv.org/pdf/2507.19362v1",
"resource_uri": "arxiv://2507.19362v1",
"citation_count": 0
},
{
"id": "2507.19349v1",
"title": "Reconstruction of Sparse Urban Wireless Signals via Group Equivariant Non-Expansive Operators",
"authors": [
"Lorenzo Mario Amorosa",
"Francesco Conti",
"Nicola Quercioli",
"Flavio Zabini",
"Tayebeh Lotfi Mahyari",
"Yiqun Ge",
"Patrizio Frosini"
],
"abstract": "In emerging communication systems such as sixth generation (6G) wireless\nnetworks, efficient resource management and service delivery rely on accurate\nknowledge of spatially-varying quantities like signal-to-interference-noise\nratio (SINR) maps, which are costly to acquire at high resolution. This work\nexplores the reconstruction of such spatial signals from sparse measurements\nusing Group Equivariant Non-Expansive Operators (GENEOs), offering a\nlow-complexity alternative to traditional neural networks. The concept of\nGENEO, which originated in topological data analysis (TDA), is a mathematical\ntool used in machine learning to represent agents modelled as functional\noperators acting on data while incorporating application-specific invariances.\nLeveraging these invariances reduces the number of parameters with respect to\ntraditional neural networks and mitigates data scarcity by enforcing known\nalgebraic and geometric constraints that reflect symmetries in the agents'\nactions. In this paper, we introduce a novel GENEO-based approach for SINR map\nreconstruction in urban wireless communication networks using extremely sparse\nsampling. We demonstrate that this mathematical framework achieves competitive\nperformance compared to established methods. Our evaluation, conducted using\nboth statistical and TDA metrics, highlights the advantages of our approach in\naccurately reconstructing spatial signals under severe data limitations on the\nnumber of samples.",
"categories": [
"cs.LG",
"cs.NI"
],
"published": "2025-07-25T14:59:44+00:00",
"url": "http://arxiv.org/pdf/2507.19349v1",
"resource_uri": "arxiv://2507.19349v1",
"citation_count": 0
},
{
"id": "2507.19346v1",
"title": "Short-Form Video Recommendations with Multimodal Embeddings: Addressing Cold-Start and Bias Challenges",
"authors": [
"Andrii Dzhoha",
"Katya Mirylenka",
"Egor Malykh",
"Marco-Andrea Buchmann",
"Francesca Catino"
],
"abstract": "In recent years, social media users have spent significant amounts of time on\nshort-form video platforms. As a result, established platforms in other\ndomains, such as e-commerce, have begun introducing short-form video content to\nengage users and increase their time spent on the platform. The success of\nthese experiences is due not only to the content itself but also to a unique UI\ninnovation: instead of offering users a list of choices to click, platforms\nactively recommend content for users to watch one at a time. This creates new\nchallenges for recommender systems, especially when launching a new video\nexperience. Beyond the limited interaction data, immersive feed experiences\nintroduce stronger position bias due to the UI and duration bias when\noptimizing for watch-time, as models tend to favor shorter videos. These\nissues, together with the feedback loop inherent in recommender systems, make\nit difficult to build effective solutions. In this paper, we highlight the\nchallenges faced when introducing a new short-form video experience and present\nour experience showing that, even with sufficient video interaction data, it\ncan be more beneficial to leverage a video retrieval system using a fine-tuned\nmultimodal vision-language model to overcome these challenges. This approach\ndemonstrated greater effectiveness compared to conventional supervised learning\nmethods in online experiments conducted on our e-commerce platform.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T14:57:04+00:00",
"url": "http://arxiv.org/pdf/2507.19346v1",
"resource_uri": "arxiv://2507.19346v1",
"citation_count": 0
},
{
"id": "2507.19334v1",
"title": "Doubling Your Data in Minutes: Ultra-fast Tabular Data Generation via LLM-Induced Dependency Graphs",
"authors": [
"Shuo Yang",
"Zheyu Zhang",
"Bardh Prenkaj",
"Gjergji Kasneci"
],
"abstract": "Tabular data is critical across diverse domains, yet high-quality datasets\nremain scarce due to privacy concerns and the cost of collection. Contemporary\napproaches adopt large language models (LLMs) for tabular augmentation, but\nexhibit two major limitations: (1) dense dependency modeling among tabular\nfeatures that can introduce bias, and (2) high computational overhead in\nsampling. To address these issues, we propose SPADA for SPArse\nDependency-driven Augmentation, a lightweight generative framework that\nexplicitly captures sparse dependencies via an LLM-induced graph. We treat each\nfeature as a node and synthesize values by traversing the graph, conditioning\neach feature solely on its parent nodes. We explore two synthesis strategies: a\nnon-parametric method using Gaussian kernel density estimation, and a\nconditional normalizing flow model that learns invertible mappings for\nconditional density estimation. Experiments on four datasets show that SPADA\nreduces constraint violations by 4% compared to diffusion-based methods and\naccelerates generation by nearly 9,500 times over LLM-based baselines.",
"categories": [
"cs.LG",
"cs.AI"
],
"published": "2025-07-25T14:43:50+00:00",
"url": "http://arxiv.org/pdf/2507.19334v1",
"resource_uri": "arxiv://2507.19334v1",
"citation_count": 0
},
{
"id": "2507.19321v1",
"title": "SIDE: Sparse Information Disentanglement for Explainable Artificial Intelligence",
"authors": [
"Viktar Dubovik",
"Łukasz Struski",
"Jacek Tabor",
"Dawid Rymarczyk"
],
"abstract": "Understanding the decisions made by deep neural networks is essential in\nhigh-stakes domains such as medical imaging and autonomous driving. Yet, these\nmodels often lack transparency, particularly in computer vision.\nPrototypical-parts-based neural networks have emerged as a promising solution\nby offering concept-level explanations. However, most are limited to\nfine-grained classification tasks, with few exceptions such as InfoDisent.\nInfoDisent extends prototypical models to large-scale datasets like ImageNet,\nbut produces complex explanations.\n We introduce Sparse Information Disentanglement for Explainability (SIDE), a\nnovel method that improves the interpretability of prototypical parts through a\ndedicated training and pruning scheme that enforces sparsity. Combined with\nsigmoid activations in place of softmax, this approach allows SIDE to associate\neach class with only a small set of relevant prototypes. Extensive experiments\nshow that SIDE matches the accuracy of existing methods while reducing\nexplanation size by over $90\\%$, substantially enhancing the understandability\nof prototype-based explanations.",
"categories": [
"cs.CV",
"cs.AI",
"cs.LG"
],
"published": "2025-07-25T14:34:15+00:00",
"url": "http://arxiv.org/pdf/2507.19321v1",
"resource_uri": "arxiv://2507.19321v1",
"citation_count": 0
},
{
"id": "2507.19307v1",
"title": "Stability and Symmetry-Assured Crystal Structure Generation for Inverse Design of Photocatalysts in Water Splitting",
"authors": [
"Zhilong Song",
"Chongyi Ling",
"Qiang Li",
"Qionghua Zhou",
"Jinlan Wang"
],
"abstract": "Generative models are revolutionizing materials discovery by enabling inverse\ndesign-direct generation of structures from desired properties. However,\nexisting approaches often struggle to ensure inherent stability and symmetry\nwhile precisely generating structures with target compositions, space groups,\nand lattices without fine-tuning. Here, we present SSAGEN (Stability and\nSymmetry-Assured GENerative framework), which overcomes these limitations by\ndecoupling structure generation into two distinct stages: crystal information\n(lattice, composition, and space group) generation and coordinate optimization.\nSSAGEN first generates diverse yet physically plausible crystal information,\nthen derives stable and metastable atomic positions through universal machine\nlearning potentials, combined global and local optimization with symmetry and\nWyckoff position constraints, and dynamically refined search spaces. Compared\nto prior generative models such as CDVAE, SSAGEN improves the thermodynamic and\nkinetic stability of generated structures by 148% and 180%, respectively, while\ninherently satisfying target compositions, space groups, and lattices. Applied\nto photocatalytic water splitting (PWS), SSAGEN generates 200,000\nstructures-81.2% novel-with 3,318 meeting all stability and band gap criteria.\nDensity functional theory (DFT) validation confirms 95.6% structures satisfy\nPWS requirements, with 24 optimal candidates identified through comprehensive\nscreening based on electronic structure, thermodynamic, kinetic, and aqueous\nstability criteria. SSAGEN not only precisely generates materials with desired\ncrystal information but also ensures inherent stability and symmetry,\nestablishing a new paradigm for targeted inverse design of functional\nmaterials.",
"categories": [
"cond-mat.mtrl-sci"
],
"published": "2025-07-25T14:22:52+00:00",
"url": "http://arxiv.org/pdf/2507.19307v1",
"resource_uri": "arxiv://2507.19307v1",
"citation_count": 0
},
{
"id": "2507.19290v1",
"title": "Query Efficient Structured Matrix Learning",
"authors": [
"Noah Amsel",
"Pratyush Avi",
"Tyler Chen",
"Feyza Duman Keles",
"Chinmay Hegde",
"Cameron Musco",
"Christopher Musco",
"David Persson"
],
"abstract": "We study the problem of learning a structured approximation (low-rank,\nsparse, banded, etc.) to an unknown matrix $A$ given access to matrix-vector\nproduct (matvec) queries of the form $x \\rightarrow Ax$ and $x \\rightarrow\nA^Tx$. This problem is of central importance to algorithms across scientific\ncomputing and machine learning, with applications to fast multiplication and\ninversion for structured matrices, building preconditioners for first-order\noptimization, and as a model for differential operator learning. Prior work\nfocuses on obtaining query complexity upper and lower bounds for learning\nspecific structured matrix families that commonly arise in applications.\n We initiate the study of the problem in greater generality, aiming to\nunderstand the query complexity of learning approximations from general matrix\nfamilies. Our main result focuses on finding a near-optimal approximation to\n$A$ from any finite-sized family of matrices, $\\mathcal{F}$. Standard results\nfrom matrix sketching show that $O(\\log|\\mathcal{F}|)$ matvec queries suffice\nin this setting. This bound can also be achieved, and is optimal, for\nvector-matrix-vector queries of the form $x,y\\rightarrow x^TAy$, which have\nbeen widely studied in work on rank-$1$ matrix sensing.\n Surprisingly, we show that, in the matvec model, it is possible to obtain a\nnearly quadratic improvement in complexity, to\n$\\tilde{O}(\\sqrt{\\log|\\mathcal{F}|})$. Further, we prove that this bound is\ntight up to log-log factors.Via covering number arguments, our result extends\nto well-studied infinite families. As an example, we establish that a\nnear-optimal approximation from any \\emph{linear matrix family} of dimension\n$q$ can be learned with $\\tilde{O}(\\sqrt{q})$ matvec queries, improving on an\n$O(q)$ bound achievable via sketching techniques and vector-matrix-vector\nqueries.",
"categories": [
"cs.DS",
"cs.LG",
"cs.NA",
"math.NA"
],
"published": "2025-07-25T14:04:20+00:00",
"url": "http://arxiv.org/pdf/2507.19290v1",
"resource_uri": "arxiv://2507.19290v1",
"citation_count": 0
},
{
"id": "2507.19261v1",
"title": "Knowledge Grafting: A Mechanism for Optimizing AI Model Deployment in Resource-Constrained Environments",
"authors": [
"Osama Almurshed",
"Ashish Kaushal",
"Asmail Muftah",
"Nitin Auluck",
"Omer Rana"
],
"abstract": "The increasing adoption of Artificial Intelligence (AI) has led to larger,\nmore complex models with numerous parameters that require substantial computing\npower -- resources often unavailable in many real-world application scenarios.\nOur paper addresses this challenge by introducing knowledge grafting, a novel\nmechanism that optimizes AI models for resource-constrained environments by\ntransferring selected features (the scion) from a large donor model to a\nsmaller rootstock model. The approach achieves an 88.54% reduction in model\nsize (from 64.39 MB to 7.38 MB), while improving generalization capability of\nthe model. Our new rootstock model achieves 89.97% validation accuracy (vs.\ndonor's 87.47%), maintains lower validation loss (0.2976 vs. 0.5068), and\nperforms exceptionally well on unseen test data with 90.45% accuracy. It\naddresses the typical size vs performance trade-off, and enables deployment of\nAI frameworks on resource-constrained devices with enhanced performance. We\nhave tested our approach on an agricultural weed detection scenario, however,\nit can be extended across various edge computing scenarios, potentially\naccelerating AI adoption in areas with limited hardware/software support -- by\nmirroring in a similar manner the horticultural grafting enables productive\ncultivation in challenging agri-based environments.",
"categories": [
"cs.AI",
"cs.LG",
"cs.PF"
],
"published": "2025-07-25T13:37:45+00:00",
"url": "http://arxiv.org/pdf/2507.19261v1",
"resource_uri": "arxiv://2507.19261v1",
"citation_count": 0
},
{
"id": "2507.19247v1",
"title": "A Markov Categorical Framework for Language Modeling",
"authors": [
"Yifan Zhang"
],
"abstract": "Auto-regressive language models factorize sequence probabilities and are\ntrained by minimizing the negative log-likelihood (NLL) objective. While\nempirically powerful, a deep theoretical understanding of why this simple\nobjective yields such versatile representations remains elusive. This work\nintroduces a unifying analytical framework using Markov Categories (MCs) to\ndeconstruct the AR generation process and the NLL objective. We model the\nsingle-step generation map as a composition of Markov kernels in the category\nStoch. This compositional view, when enriched with statistical divergences,\nallows us to dissect information flow and learned geometry. Our framework makes\nthree main contributions. First, we provide a formal, information-theoretic\nrationale for the success of modern speculative decoding methods like EAGLE,\nquantifying the information surplus in hidden states that these methods\nexploit. Second, we formalize how NLL minimization forces the model to learn\nnot just the next token, but the data's intrinsic conditional stochasticity, a\nprocess we analyze using categorical entropy. Third, and most centrally, we\nprove that NLL training acts as an implicit form of spectral contrastive\nlearning. By analyzing the information geometry of the model's prediction head,\nwe show that NLL implicitly forces the learned representation space to align\nwith the eigenspectrum of a predictive similarity operator, thereby learning a\ngeometrically structured space without explicit contrastive pairs. This\ncompositional and information-geometric perspective reveals the deep structural\nprinciples underlying the effectiveness of modern LMs. Project Page:\nhttps://github.com/asiresearch/lm-theory",
"categories": [
"cs.LG",
"cs.AI",
"cs.CL"
],
"published": "2025-07-25T13:14:03+00:00",
"url": "http://arxiv.org/pdf/2507.19247v1",
"resource_uri": "arxiv://2507.19247v1",
"citation_count": 0
},
{
"id": "2507.19233v1",
"title": "Component-Based Machine Learning for Indoor Flow and Temperature Fields Prediction Latent Feature Aggregation and Flow Interaction",
"authors": [
"Shaofan Wang",
"Nils Thuerey",
"Philipp Geyer"
],
"abstract": "Accurate and efficient prediction of indoor airflow and temperature\ndistributions is essential for building energy optimization and occupant\ncomfort control. However, traditional CFD simulations are computationally\nintensive, limiting their integration into real-time or design-iterative\nworkflows. This study proposes a component-based machine learning (CBML)\nsurrogate modeling approach to replace conventional CFD simulation for fast\nprediction of indoor velocity and temperature fields. The model consists of\nthree neural networks: a convolutional autoencoder with residual connections\n(CAER) to extract and compress flow features, a multilayer perceptron (MLP) to\nmap inlet velocities to latent representations, and a convolutional neural\nnetwork (CNN) as an aggregator to combine single-inlet features into dual-inlet\nscenarios. A two-dimensional room with varying left and right air inlet\nvelocities is used as a benchmark case, with CFD simulations providing training\nand testing data. Results show that the CBML model accurately and fast predicts\ntwo-component aggregated velocity and temperature fields across both training\nand testing datasets.",
"categories": [
"cs.LG",
"physics.flu-dyn"
],
"published": "2025-07-25T12:57:30+00:00",
"url": "http://arxiv.org/pdf/2507.19233v1",
"resource_uri": "arxiv://2507.19233v1",
"citation_count": 0
},
{
"id": "2507.19229v1",
"title": "TrinityDNA: A Bio-Inspired Foundational Model for Efficient Long-Sequence DNA Modeling",
"authors": [
"Qirong Yang",
"Yucheng Guo",
"Zicheng Liu",
"Yujie Yang",
"Qijin Yin",
"Siyuan Li",
"Shaomin Ji",
"Linlin Chao",
"Xiaoming Zhang",
"Stan Z. Li"
],
"abstract": "The modeling of genomic sequences presents unique challenges due to their\nlength and structural complexity. Traditional sequence models struggle to\ncapture long-range dependencies and biological features inherent in DNA. In\nthis work, we propose TrinityDNA, a novel DNA foundational model designed to\naddress these challenges. The model integrates biologically informed\ncomponents, including Groove Fusion for capturing DNA's structural features and\nGated Reverse Complement (GRC) to handle the inherent symmetry of DNA\nsequences. Additionally, we introduce a multi-scale attention mechanism that\nallows the model to attend to varying levels of sequence dependencies, and an\nevolutionary training strategy that progressively adapts the model to both\nprokaryotic and eukaryotic genomes. TrinityDNA provides a more accurate and\nefficient approach to genomic sequence modeling, offering significant\nimprovements in gene function prediction, regulatory mechanism discovery, and\nother genomics applications. Our model bridges the gap between machine learning\ntechniques and biological insights, paving the way for more effective analysis\nof genomic data. Additionally, we introduced a new DNA long-sequence CDS\nannotation benchmark to make evaluations more comprehensive and oriented toward\npractical applications.",
"categories": [
"cs.CE"
],
"published": "2025-07-25T12:55:30+00:00",
"url": "http://arxiv.org/pdf/2507.19229v1",
"resource_uri": "arxiv://2507.19229v1",
"citation_count": 0
},
{
"id": "2507.19211v1",
"title": "Dependency-aware synthetic tabular data generation",
"authors": [
"Chaithra Umesh",
"Kristian Schultz",
"Manjunath Mahendra",
"Saptarshi Bej",
"Olaf Wolkenhauer"
],
"abstract": "Synthetic tabular data is increasingly used in privacy-sensitive domains such\nas health care, but existing generative models often fail to preserve\ninter-attribute relationships. In particular, functional dependencies (FDs) and\nlogical dependencies (LDs), which capture deterministic and rule-based\nassociations between features, are rarely or often poorly retained in synthetic\ndatasets. To address this research gap, we propose the Hierarchical Feature\nGeneration Framework (HFGF) for synthetic tabular data generation. We created\nbenchmark datasets with known dependencies to evaluate our proposed HFGF. The\nframework first generates independent features using any standard generative\nmodel, and then reconstructs dependent features based on predefined FD and LD\nrules. Our experiments on four benchmark datasets with varying sizes, feature\nimbalance, and dependency complexity demonstrate that HFGF improves the\npreservation of FDs and LDs across six generative models, including CTGAN,\nTVAE, and GReaT. Our findings demonstrate that HFGF can significantly enhance\nthe structural fidelity and downstream utility of synthetic tabular data.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T12:29:58+00:00",
"url": "http://arxiv.org/pdf/2507.19211v1",
"resource_uri": "arxiv://2507.19211v1",
"citation_count": 0
},
{
"id": "2507.19205v1",
"title": "Physics-Informed Graph Neural Networks for Transverse Momentum Estimation in CMS Trigger Systems",
"authors": [
"Md Abrar Jahin",
"Shahriar Soudeep",
"M. F. Mridha",
"Muhammad Mostafa Monowar",
"Md. Abdul Hamid"
],
"abstract": "Real-time particle transverse momentum ($p_T$) estimation in high-energy\nphysics demands algorithms that are both efficient and accurate under strict\nhardware constraints. Static machine learning models degrade under high pileup\nand lack physics-aware optimization, while generic graph neural networks (GNNs)\noften neglect domain structure critical for robust $p_T$ regression. We propose\na physics-informed GNN framework that systematically encodes detector geometry\nand physical observables through four distinct graph construction strategies\nthat systematically encode detector geometry and physical observables:\nstation-as-node, feature-as-node, bending angle-centric, and pseudorapidity\n($\\eta$)-centric representations. This framework integrates these tailored\ngraph structures with a novel Message Passing Layer (MPL), featuring\nintra-message attention and gated updates, and domain-specific loss functions\nincorporating $p_{T}$-distribution priors. Our co-design methodology yields\nsuperior accuracy-efficiency trade-offs compared to existing baselines.\nExtensive experiments on the CMS Trigger Dataset validate the approach: a\nstation-informed EdgeConv model achieves a state-of-the-art MAE of 0.8525 with\n$\\ge55\\%$ fewer parameters than deep learning baselines, especially TabNet,\nwhile an $\\eta$-centric MPL configuration also demonstrates improved accuracy\nwith comparable efficiency. These results establish the promise of\nphysics-guided GNNs for deployment in resource-constrained trigger systems.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T12:19:57+00:00",
"url": "http://arxiv.org/pdf/2507.19205v1",
"resource_uri": "arxiv://2507.19205v1",
"citation_count": 0
},
{
"id": "2507.19197v1",
"title": "WACA-UNet: Weakness-Aware Channel Attention for Static IR Drop Prediction in Integrated Circuit Design",
"authors": [
"Youngmin Seo",
"Yunhyeong Kwon",
"Younghun Park",
"HwiRyong Kim",
"Seungho Eum",
"Jinha Kim",
"Taigon Song",
"Juho Kim",
"Unsang Park"
],
"abstract": "Accurate spatial prediction of power integrity issues, such as IR drop, is\ncritical for reliable VLSI design. However, traditional simulation-based\nsolvers are computationally expensive and difficult to scale. We address this\nchallenge by reformulating IR drop estimation as a pixel-wise regression task\non heterogeneous multi-channel physical maps derived from circuit layouts.\nPrior learning-based methods treat all input layers (e.g., metal, via, and\ncurrent maps) equally, ignoring their varying importance to prediction\naccuracy. To tackle this, we propose a novel Weakness-Aware Channel Attention\n(WACA) mechanism, which recursively enhances weak feature channels while\nsuppressing over-dominant ones through a two-stage gating strategy. Integrated\ninto a ConvNeXtV2-based attention U-Net, our approach enables adaptive and\nbalanced feature representation. On the public ICCAD-2023 benchmark, our method\noutperforms the ICCAD-2023 contest winner by reducing mean absolute error by\n61.1% and improving F1-score by 71.0%. These results demonstrate that\nchannel-wise heterogeneity is a key inductive bias in physical layout analysis\nfor VLSI.",
"categories": [
"cs.LG",
"cs.AI",
"cs.CV",
"B.7.2; I.5.1; I.2.10; I.5.4"
],
"published": "2025-07-25T12:07:16+00:00",
"url": "http://arxiv.org/pdf/2507.19197v1",
"resource_uri": "arxiv://2507.19197v1",
"citation_count": 0
},
{
"id": "2507.19195v1",
"title": "Can Small-Scale Data Poisoning Exacerbate Dialect-Linked Biases in Large Language Models?",
"authors": [
"Chaymaa Abbas",
"Mariette Awad",
"Razane Tajeddine"
],
"abstract": "Despite the ongoing improvements in the design of large language models\n(LLMs) to foster inclusion and balanced responses, these systems remain\nsusceptible to encoding and amplifying social biases. This study examines how\ndialectal variation, specifically African American Vernacular English (AAVE)\nversus Standard American English (SAE), interacts with data poisoning to\ninfluence toxicity in outputs. Using both small- and medium-scale LLaMA models,\nwe show that even minimal exposure to poisoned data significantly increases\ntoxicity for AAVE inputs, while it remains comparatively unaffected for SAE.\nLarger models exhibit a more significant amplification effect which suggests\nheightened susceptibility with scale. To further assess these disparities, we\nemployed GPT-4o as a fairness auditor, which identified harmful stereotypical\npatterns disproportionately tied to AAVE inputs, including portrayals of\naggression, criminality, and intellectual inferiority. These findings\nunderscore the compounding impact of data poisoning and dialectal bias and\nemphasize the need for dialect-aware evaluation, targeted debiasing\ninterventions, and socially responsible training protocols during development.",
"categories": [
"cs.CL",
"cs.AI",
"cs.LG"
],
"published": "2025-07-25T12:05:47+00:00",
"url": "http://arxiv.org/pdf/2507.19195v1",
"resource_uri": "arxiv://2507.19195v1",
"citation_count": 0
},
{
"id": "2507.19189v1",
"title": "Machine Learning Band Gap Predictions: Linking Quasiparticle Self-Consistent GW and LDA-Derived Partial Density of States",
"authors": [
"Shota Tankano",
"Takao Kotani",
"Masao Obata",
"Kazunori Sato",
"Harutaka Saito",
"Tatsuki Oda"
],
"abstract": "Accurately calculating band gaps for given crystal structures is highly\ndesirable. However, conventional first-principles calculations based on density\nfunctional theory (DFT) within the local density approximation (LDA) fail to\npredict band gaps accurately. To address this issue, the quasi-particle\nself-consistent GW (QSGW) method is often employed as it is one of the most\nreliable theoretical approaches for predicting band gaps. Despite its accuracy,\nQSGW requires significant computational resources. To overcome this limitation,\nwe propose combining QSGW with machine learning. In this study, we applied QSGW\nto 1,516 materials from the Materials Project [https://materialsproject.org/]\nand used machine learning to predict QSGW band gaps as a function of the\npartial density of states (PDOS) in LDA. Our results demonstrate that the\nproposed model significantly outperforms linear regression approaches with\nlinearly-independent descriptor generation\n[https://github.com/Hitoshi-FUJII/LIDG]. This model is a prototype for\npredicting material properties based on PDOS.",
"categories": [
"cond-mat.mtrl-sci"
],
"published": "2025-07-25T11:58:54+00:00",
"url": "http://arxiv.org/pdf/2507.19189v1",
"resource_uri": "arxiv://2507.19189v1",
"citation_count": 0
},
{
"id": "2507.19174v1",
"title": "Automatic Cough Analysis for Non-Small Cell Lung Cancer Detection",
"authors": [
"Chiara Giangregorio",
"Cristina Maria Licciardello",
"Vanja Miskovic",
"Leonardo Provenzano",
"Alessandra Laura Giulia Pedrocchi",
"Andra Diana Dumitrascu",
"Arsela Prelaj",
"Marina Chiara Garassino",
"Emilia Ambrosini",
"Simona Ferrante"
],
"abstract": "Early detection of non-small cell lung cancer (NSCLC) is critical for\nimproving patient outcomes, and novel approaches are needed to facilitate early\ndiagnosis. In this study, we explore the use of automatic cough analysis as a\npre-screening tool for distinguishing between NSCLC patients and healthy\ncontrols. Cough audio recordings were prospectively acquired from a total of\n227 subjects, divided into NSCLC patients and healthy controls. The recordings\nwere analyzed using machine learning techniques, such as support vector machine\n(SVM) and XGBoost, as well as deep learning approaches, specifically\nconvolutional neural networks (CNN) and transfer learning with VGG16. To\nenhance the interpretability of the machine learning model, we utilized Shapley\nAdditive Explanations (SHAP). The fairness of the models across demographic\ngroups was assessed by comparing the performance of the best model across\ndifferent age groups (less than or equal to 58y and higher than 58y) and gender\nusing the equalized odds difference on the test set. The results demonstrate\nthat CNN achieves the best performance, with an accuracy of 0.83 on the test\nset. Nevertheless, SVM achieves slightly lower performances (accuracy of 0.76\nin validation and 0.78 in the test set), making it suitable in contexts with\nlow computational power. The use of SHAP for SVM interpretation further\nenhances model transparency, making it more trustworthy for clinical\napplications. Fairness analysis shows slightly higher disparity across age\n(0.15) than gender (0.09) on the test set. Therefore, to strengthen our\nfindings' reliability, a larger, more diverse, and unbiased dataset is needed\n-- particularly including individuals at risk of NSCLC and those in early\ndisease stages.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T11:30:22+00:00",
"url": "http://arxiv.org/pdf/2507.19174v1",
"resource_uri": "arxiv://2507.19174v1",
"citation_count": 0
},
{
"id": "2507.19168v1",
"title": "Explainable AI guided unsupervised fault diagnostics for high-voltage circuit breakers",
"authors": [
"Chi-Ching Hsu",
"Gaëtan Frusque",
"Florent Forest",
"Felipe Macedo",
"Christian M. Franck",
"Olga Fink"
],
"abstract": "Commercial high-voltage circuit breaker (CB) condition monitoring systems\nrely on directly observable physical parameters such as gas filling pressure\nwith pre-defined thresholds. While these parameters are crucial, they only\ncover a small subset of malfunctioning mechanisms and usually can be monitored\nonly if the CB is disconnected from the grid. To facilitate online condition\nmonitoring while CBs remain connected, non-intrusive measurement techniques\nsuch as vibration or acoustic signals are necessary. Currently, CB condition\nmonitoring studies using these signals typically utilize supervised methods for\nfault diagnostics, where ground-truth fault types are known due to artificially\nintroduced faults in laboratory settings. This supervised approach is however\nnot feasible in real-world applications, where fault labels are unavailable. In\nthis work, we propose a novel unsupervised fault detection and segmentation\nframework for CBs based on vibration and acoustic signals. This framework can\ndetect deviations from the healthy state. The explainable artificial\nintelligence (XAI) approach is applied to the detected faults for fault\ndiagnostics. The specific contributions are: (1) we propose an integrated\nunsupervised fault detection and segmentation framework that is capable of\ndetecting faults and clustering different faults with only healthy data\nrequired during training (2) we provide an unsupervised explainability-guided\nfault diagnostics approach using XAI to offer domain experts potential\nindications of the aged or faulty components, achieving fault diagnostics\nwithout the prerequisite of ground-truth fault labels. These contributions are\nvalidated using an experimental dataset from a high-voltage CB under healthy\nand artificially introduced fault conditions, contributing to more reliable CB\nsystem operation.",
"categories": [
"cs.LG",
"eess.SP"
],
"published": "2025-07-25T11:14:56+00:00",
"url": "http://arxiv.org/pdf/2507.19168v1",
"resource_uri": "arxiv://2507.19168v1",
"citation_count": 0
},
{
"id": "2507.19156v1",
"title": "An Empirical Investigation of Gender Stereotype Representation in Large Language Models: The Italian Case",
"authors": [
"Gioele Giachino",
"Marco Rondina",
"Antonio Vetrò",
"Riccardo Coppola",
"Juan Carlos De Martin"
],
"abstract": "The increasing use of Large Language Models (LLMs) in a large variety of\ndomains has sparked worries about how easily they can perpetuate stereotypes\nand contribute to the generation of biased content. With a focus on gender and\nprofessional bias, this work examines in which manner LLMs shape responses to\nungendered prompts, contributing to biased outputs. This analysis uses a\nstructured experimental method, giving different prompts involving three\ndifferent professional job combinations, which are also characterized by a\nhierarchical relationship. This study uses Italian, a language with extensive\ngrammatical gender differences, to highlight potential limitations in current\nLLMs' ability to generate objective text in non-English languages. Two popular\nLLM-based chatbots are examined, namely OpenAI ChatGPT (gpt-4o-mini) and Google\nGemini (gemini-1.5-flash). Through APIs, we collected a range of 3600\nresponses. The results highlight how content generated by LLMs can perpetuate\nstereotypes. For example, Gemini associated 100% (ChatGPT 97%) of 'she'\npronouns to the 'assistant' rather than the 'manager'. The presence of bias in\nAI-generated text can have significant implications in many fields, such as in\nthe workplaces or in job selections, raising ethical concerns about its use.\nUnderstanding these risks is pivotal to developing mitigation strategies and\nassuring that AI-based systems do not increase social inequalities, but rather\ncontribute to more equitable outcomes. Future research directions include\nexpanding the study to additional chatbots or languages, refining prompt\nengineering methods or further exploiting a larger experimental base.",
"categories": [
"cs.CL",
"cs.AI",
"cs.CY",
"cs.HC"
],
"published": "2025-07-25T10:57:29+00:00",
"url": "http://arxiv.org/pdf/2507.19156v1",
"resource_uri": "arxiv://2507.19156v1",
"citation_count": 0
},
{
"id": "2507.19151v1",
"title": "ReCoDe: Reinforcement Learning-based Dynamic Constraint Design for Multi-Agent Coordination",
"authors": [
"Michael Amir",
"Guang Yang",
"Zhan Gao",
"Keisuke Okumura",
"Heedo Woo",
"Amanda Prorok"
],
"abstract": "Constraint-based optimization is a cornerstone of robotics, enabling the\ndesign of controllers that reliably encode task and safety requirements such as\ncollision avoidance or formation adherence. However, handcrafted constraints\ncan fail in multi-agent settings that demand complex coordination. We introduce\nReCoDe--Reinforcement-based Constraint Design--a decentralized, hybrid\nframework that merges the reliability of optimization-based controllers with\nthe adaptability of multi-agent reinforcement learning. Rather than discarding\nexpert controllers, ReCoDe improves them by learning additional, dynamic\nconstraints that capture subtler behaviors, for example, by constraining agent\nmovements to prevent congestion in cluttered scenarios. Through local\ncommunication, agents collectively constrain their allowed actions to\ncoordinate more effectively under changing conditions. In this work, we focus\non applications of ReCoDe to multi-agent navigation tasks requiring intricate,\ncontext-based movements and consensus, where we show that it outperforms purely\nhandcrafted controllers, other hybrid approaches, and standard MARL baselines.\nWe give empirical (real robot) and theoretical evidence that retaining a\nuser-defined controller, even when it is imperfect, is more efficient than\nlearning from scratch, especially because ReCoDe can dynamically change the\ndegree to which it relies on this controller.",
"categories": [
"cs.RO",
"cs.AI",
"cs.LG",
"cs.MA",
"I.2.9"
],
"published": "2025-07-25T10:47:39+00:00",
"url": "http://arxiv.org/pdf/2507.19151v1",
"resource_uri": "arxiv://2507.19151v1",
"citation_count": 0
},
{
"id": "2507.19149v1",
"title": "Machine Learning based Radio Environment Map Estimation for Indoor Visible Light Communication",
"authors": [
"Helena Serpi",
"Christina",
"Politi"
],
"abstract": "An innovative method for radio map estimation in optical wireless\ncommunications is proposed that is based on Machine Learning rather than\nsimulation techniques. Multi-Layer Perceptron (MLP) representation of indoor\nVisible Light Communication (VLC) systems is suggested, and signal propagation\nis estimated. The simulation and performance predictions are accurate, fast and\nrequire a reduced set of training sample size with respect to other\ncounterparts, making this solution very suitable for real time estimation of an\nindoor VLC system. It is shown that by tweaking MLP parameters, such as sample\nsize, number of epochs and batch size, one can balance the desired level of\ninference accuracy with training time and optimize the model's performance to\nmeet real-time requirements.",
"categories": [
"eess.SP",
"I.2.6; C.2.1; C.2.3; C.4"
],
"published": "2025-07-25T10:40:03+00:00",
"url": "http://arxiv.org/pdf/2507.19149v1",
"resource_uri": "arxiv://2507.19149v1",
"citation_count": 0
},
{
"id": "2507.19144v1",
"title": "Solar Photovoltaic Assessment with Large Language Model",
"authors": [
"Muhao Guo",
"Yang Weng"
],
"abstract": "Accurate detection and localization of solar photovoltaic (PV) panels in\nsatellite imagery is essential for optimizing microgrids and active\ndistribution networks (ADNs), which are critical components of renewable energy\nsystems. Existing methods lack transparency regarding their underlying\nalgorithms or training datasets, rely on large, high-quality PV training data,\nand struggle to generalize to new geographic regions or varied environmental\nconditions without extensive re-training. These limitations lead to\ninconsistent detection outcomes, hindering large-scale deployment and\ndata-driven grid optimization. In this paper, we investigate how large language\nmodels (LLMs) can be leveraged to overcome these challenges. Despite their\npromise, LLMs face several challenges in solar panel detection, including\ndifficulties with multi-step logical processes, inconsistent output formatting,\nfrequent misclassification of visually similar objects (e.g., shadows, parking\nlots), and low accuracy in complex tasks such as spatial localization and\nquantification. To overcome these issues, we propose the PV Assessment with\nLLMs (PVAL) framework, which incorporates task decomposition for more efficient\nworkflows, output standardization for consistent and scalable formatting,\nfew-shot prompting to enhance classification accuracy, and fine-tuning using\ncurated PV datasets with detailed annotations. PVAL ensures transparency,\nscalability, and adaptability across heterogeneous datasets while minimizing\ncomputational overhead. By combining open-source accessibility with robust\nmethodologies, PVAL establishes an automated and reproducible pipeline for\nsolar panel detection, paving the way for large-scale renewable energy\nintegration and optimized grid management.",
"categories": [
"cs.LG",
"cs.AI"
],
"published": "2025-07-25T10:26:29+00:00",
"url": "http://arxiv.org/pdf/2507.19144v1",
"resource_uri": "arxiv://2507.19144v1",
"citation_count": 0
},
{
"id": "2507.19143v1",
"title": "Game-Theoretic Gradient Control for Robust Neural Network Training",
"authors": [
"Maria Zaitseva",
"Ivan Tomilov",
"Natalia Gusarova"
],
"abstract": "Feed-forward neural networks (FFNNs) are vulnerable to input noise, reducing\nprediction performance. Existing regularization methods like dropout often\nalter network architecture or overlook neuron interactions. This study aims to\nenhance FFNN noise robustness by modifying backpropagation, interpreted as a\nmulti-agent game, and exploring controlled target variable noising. Our\n\"gradient dropout\" selectively nullifies hidden layer neuron gradients with\nprobability 1 - p during backpropagation, while keeping forward passes active.\nThis is framed within compositional game theory. Additionally, target variables\nwere perturbed with white noise or stable distributions. Experiments on ten\ndiverse tabular datasets show varying impacts: improvement or diminishing of\nrobustness and accuracy, depending on dataset and hyperparameters. Notably, on\nregression tasks, gradient dropout (p = 0.9) combined with stable distribution\ntarget noising significantly increased input noise robustness, evidenced by\nflatter MSE curves and more stable SMAPE values. These results highlight the\nmethod's potential, underscore the critical role of adaptive parameter tuning,\nand open new avenues for analyzing neural networks as complex adaptive systems\nexhibiting emergent behavior within a game-theoretic framework.",
"categories": [
"cs.NE",
"cs.LG",
"68T07",
"I.2.6"
],
"published": "2025-07-25T10:26:25+00:00",
"url": "http://arxiv.org/pdf/2507.19143v1",
"resource_uri": "arxiv://2507.19143v1",
"citation_count": 0
},
{
"id": "2507.19120v1",
"title": "Boundary-layer transition in the age of data: from a comprehensive dataset to fine-grained prediction",
"authors": [
"Wenhui Chang",
"Hongyuan Hu",
"Youcheng Xi",
"Markus Kloker",
"Honghui Teng",
"Jie Ren"
],
"abstract": "The laminar-to-turbulent transition remains a fundamental and enduring\nchallenge in fluid mechanics. Its complexity arises from the intrinsic\nnonlinearity and extreme sensitivity to external disturbances. This transition\nis critical in a wide range of applications, including aerospace, marine\nengineering, geophysical flows, and energy systems. While the governing physics\ncan be well described by the Navier-Stokes equations, practical prediction\nefforts often fall short due to the lack of comprehensive models for\nperturbation initialization and turbulence generation in numerical simulations.\nTo address the uncertainty introduced by unforeseeable environmental\nperturbations, we propose a fine-grained predictive framework that accurately\npredicts the transition location. The framework generates an extensive dataset\nusing nonlinear parabolized stability equations (NPSE). NPSE simulations are\nperformed over a wide range of randomly prescribed initial conditions for the\ngeneric zero-pressure-gradient flat-plate boundary-layer flow, resulting in a\nlarge dataset that captures the nonlinear evolution of instability waves across\nthree canonical transition pathways (Type-K, -H, and -O). From a database of\n3000 simulation cases, we extract diagnostic quantities (e.g., wall pressure\nsignals and skin-friction coefficients) from each simulation to construct a\nfeature set that links pre-transition flow characteristics to transition onset\nlocations. Machine learning models are systematically evaluated, with ensemble\nmethods-particularly XGBoost-demonstrating exceptional predictive accuracy\n(mean relative error of approximately 0.001). Compared to methods currently\navailable (e.g., N-factor, transitional turbulence model), this approach\naccounts for the physical process and achieves transition prediction without\nrelying on any empirical parameters.",
"categories": [
"physics.flu-dyn",
"nlin.CD"
],
"published": "2025-07-25T09:57:06+00:00",
"url": "http://arxiv.org/pdf/2507.19120v1",
"resource_uri": "arxiv://2507.19120v1",
"citation_count": 0
},
{
"id": "2507.19116v1",
"title": "Graph Structure Learning with Privacy Guarantees for Open Graph Data",
"authors": [
"Muhao Guo",
"Jiaqi Wu",
"Yang Weng",
"Yizheng Liao",
"Shengzhe Chen"
],
"abstract": "Ensuring privacy in large-scale open datasets is increasingly challenging\nunder regulations such as the General Data Protection Regulation (GDPR). While\ndifferential privacy (DP) provides strong theoretical guarantees, it primarily\nfocuses on noise injection during model training, neglecting privacy\npreservation at the data publishing stage. Existing privacy-preserving data\npublishing (PPDP) approaches struggle to balance privacy and utility,\nparticularly when data publishers and users are distinct entities. To address\nthis gap, we focus on the graph recovery problem and propose a novel\nprivacy-preserving estimation framework for open graph data, leveraging\nGaussian DP (GDP) with a structured noise-injection mechanism. Unlike\ntraditional methods that perturb gradients or model updates, our approach\nensures unbiased graph structure recovery while enforcing DP at the data\npublishing stage. Moreover, we provide theoretical guarantees on estimation\naccuracy and extend our method to discrete-variable graphs, a setting often\noverlooked in DP research. Experimental results in graph learning demonstrate\nrobust performance, offering a viable solution for privacy-conscious graph\nanalysis.",
"categories": [
"cs.LG",
"cs.AI"
],
"published": "2025-07-25T09:51:12+00:00",
"url": "http://arxiv.org/pdf/2507.19116v1",
"resource_uri": "arxiv://2507.19116v1",
"citation_count": 0
},
{
"id": "2507.19102v1",
"title": "Distilling a Small Utility-Based Passage Selector to Enhance Retrieval-Augmented Generation",
"authors": [
"Hengran Zhang",
"Keping Bi",
"Jiafeng Guo",
"Jiaming Zhang",
"Shuaiqiang Wang",
"Dawei Yin",
"Xueqi Cheng"
],
"abstract": "Retrieval-augmented generation (RAG) enhances large language models (LLMs) by\nincorporating retrieved information. Standard retrieval process prioritized\nrelevance, focusing on topical alignment between queries and passages. In\ncontrast, in RAG, the emphasis has shifted to utility, which considers the\nusefulness of passages for generating accurate answers. Despite empirical\nevidence showing the benefits of utility-based retrieval in RAG, the high\ncomputational cost of using LLMs for utility judgments limits the number of\npassages evaluated. This restriction is problematic for complex queries\nrequiring extensive information. To address this, we propose a method to\ndistill the utility judgment capabilities of LLMs into smaller, more efficient\nmodels. Our approach focuses on utility-based selection rather than ranking,\nenabling dynamic passage selection tailored to specific queries without the\nneed for fixed thresholds. We train student models to learn pseudo-answer\ngeneration and utility judgments from teacher LLMs, using a sliding window\nmethod that dynamically selects useful passages. Our experiments demonstrate\nthat utility-based selection provides a flexible and cost-effective solution\nfor RAG, significantly reducing computational costs while improving answer\nquality. We present the distillation results using Qwen3-32B as the teacher\nmodel for both relevance ranking and utility-based selection, distilled into\nRankQwen1.7B and UtilityQwen1.7B. Our findings indicate that for complex\nquestions, utility-based selection is more effective than relevance ranking in\nenhancing answer generation performance. We will release the relevance ranking\nand utility-based selection annotations for the MS MARCO dataset, supporting\nfurther research in this area.",
"categories": [
"cs.IR",
"cs.AI",
"cs.CL",
"cs.LG"
],
"published": "2025-07-25T09:32:29+00:00",
"url": "http://arxiv.org/pdf/2507.19102v1",
"resource_uri": "arxiv://2507.19102v1",
"citation_count": 0
},
{
"id": "2507.19095v1",
"title": "GCL-GCN: Graphormer and Contrastive Learning Enhanced Attributed Graph Clustering Network",
"authors": [
"Binxiong Li",
"Xu Xiang",
"Xue Li",
"Binyu Zhao",
"Yujie Liu",
"Huijie Tang",
"Benhan Yang",
"Zhixuan Chen"
],
"abstract": "Attributed graph clustering holds significant importance in modern data\nanalysis. However, due to the complexity of graph data and the heterogeneity of\nnode attributes, leveraging graph information for clustering remains\nchallenging. To address this, we propose a novel deep graph clustering model,\nGCL-GCN, specifically designed to address the limitations of existing models in\ncapturing local dependencies and complex structures when dealing with sparse\nand heterogeneous graph data. GCL-GCN introduces an innovative Graphormer\nmodule that combines centrality encoding and spatial relationships, effectively\ncapturing both global and local information between nodes, thereby enhancing\nthe quality of node representations. Additionally, we propose a novel\ncontrastive learning module that significantly enhances the discriminative\npower of feature representations. In the pre-training phase, this module\nincreases feature distinction through contrastive learning on the original\nfeature matrix, ensuring more identifiable initial representations for\nsubsequent graph convolution and clustering tasks. Extensive experimental\nresults on six datasets demonstrate that GCL-GCN outperforms 14 advanced\nmethods in terms of clustering quality and robustness. Specifically, on the\nCora dataset, it improves ACC, NMI, and ARI by 4.94%, 13.01%, and 10.97%,\nrespectively, compared to the primary comparison method MBN.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T09:25:55+00:00",
"url": "http://arxiv.org/pdf/2507.19095v1",
"resource_uri": "arxiv://2507.19095v1",
"citation_count": 0
},
{
"id": "2507.19093v1",
"title": "Graph Neural Network-Based Predictor for Optimal Quantum Hardware Selection",
"authors": [
"Antonio Tudisco",
"Deborah Volpe",
"Giacomo Orlandi",
"Giovanna Turvani"
],
"abstract": "The growing variety of quantum hardware technologies, each with unique\npeculiarities such as connectivity and native gate sets, creates challenges\nwhen selecting the best platform for executing a specific quantum circuit. This\nselection process usually involves a brute-force approach: compiling the\ncircuit on various devices and evaluating performance based on factors such as\ncircuit depth and gate fidelity. However, this method is computationally\nexpensive and does not scale well as the number of available quantum processors\nincreases. In this work, we propose a Graph Neural Network (GNN)-based\npredictor that automates hardware selection by analyzing the Directed Acyclic\nGraph (DAG) representation of a quantum circuit. Our study evaluates 498\nquantum circuits (up to 27 qubits) from the MQT Bench dataset, compiled using\nQiskit on four devices: three superconducting quantum processors (IBM-Kyiv,\nIBM-Brisbane, IBM-Sherbrooke) and one trapped-ion processor (IONQ-Forte).\nPerformance is estimated using a metric that integrates circuit depth and gate\nfidelity, resulting in a dataset where 93 circuits are optimally compiled on\nthe trapped-ion device, while the remaining circuits prefer superconducting\nplatforms. By exploiting graph-based machine learning, our approach avoids\nextracting the circuit features for the model evaluation but directly embeds it\nas a graph, significantly accelerating the optimal target decision-making\nprocess and maintaining all the information. Experimental results prove 94.4%\naccuracy and an 85.5% F1 score for the minority class, effectively predicting\nthe best compilation target. The developed code is publicly available on GitHub\n(https://github.com/antotu/GNN-Model-Quantum-Predictor).",
"categories": [
"quant-ph",
"cs.LG"
],
"published": "2025-07-25T09:23:04+00:00",
"url": "http://arxiv.org/pdf/2507.19093v1",
"resource_uri": "arxiv://2507.19093v1",
"citation_count": 0
},
{
"id": "2507.19092v1",
"title": "Comparing OCR Pipelines for Folkloristic Text Digitization",
"authors": [
"Octavian M. Machidon",
"Alina L. Machidon"
],
"abstract": "The digitization of historical folkloristic materials presents unique\nchallenges due to diverse text layouts, varying print and handwriting styles,\nand linguistic variations. This study explores different optical character\nrecognition (OCR) approaches for Slovene folkloristic and historical text\ndigitization, integrating both traditional methods and large language models\n(LLMs) to improve text transcription accuracy while maintaining linguistic and\nstructural integrity. We compare single-stage OCR techniques with multi-stage\npipelines that incorporate machine learning-driven post-processing for text\nnormalization and layout reconstruction. While LLM-enhanced methods show\npromise in refining recognition outputs and improving readability, they also\nintroduce challenges related to unintended modifications, particularly in the\npreservation of dialectal expressions and historical structures. Our findings\nprovide insights into selecting optimal digitization strategies for large-scale\nfolklore archives and outline recommendations for developing robust OCR\npipelines that balance automation with the need for textual authenticity in\ndigital humanities research.",
"categories": [
"cs.DL",
"cs.MM"
],
"published": "2025-07-25T09:22:41+00:00",
"url": "http://arxiv.org/pdf/2507.19092v1",
"resource_uri": "arxiv://2507.19092v1",
"citation_count": 0
},
{
"id": "2507.19060v1",
"title": "PurpCode: Reasoning for Safer Code Generation",
"authors": [
"Jiawei Liu",
"Nirav Diwan",
"Zhe Wang",
"Haoyu Zhai",
"Xiaona Zhou",
"Kiet A. Nguyen",
"Tianjiao Yu",
"Muntasir Wahed",
"Yinlin Deng",
"Hadjer Benkraouda",
"Yuxiang Wei",
"Lingming Zhang",
"Ismini Lourentzou",
"Gang Wang"
],
"abstract": "We introduce PurpCode, the first post-training recipe for training safe code\nreasoning models towards generating secure code and defending against malicious\ncyberactivities. PurpCode trains a reasoning model in two stages: (i) Rule\nLearning, which explicitly teaches the model to reference cybersafety rules to\ngenerate vulnerability-free code and to avoid facilitating malicious\ncyberactivities; and (ii) Reinforcement Learning, which optimizes model safety\nand preserves model utility through diverse, multi-objective reward mechanisms.\nTo empower the training pipelines with comprehensive cybersafety data, we\nconduct internal red-teaming to synthesize comprehensive and high-coverage\nprompts based on real-world tasks for inducing unsafe cyberactivities in the\nmodel. Based on PurpCode, we develop a reasoning-based coding model, namely\nPurpCode-32B, which demonstrates state-of-the-art cybersafety, outperforming\nvarious frontier models. Meanwhile, our alignment method decreases the model\noverrefusal rates in both general and cybersafety-specific scenarios, while\npreserving model utility in both code generation and common security knowledge.",
"categories": [
"cs.CR",
"cs.CL",
"cs.LG",
"cs.SE"
],
"published": "2025-07-25T08:23:00+00:00",
"url": "http://arxiv.org/pdf/2507.19060v1",
"resource_uri": "arxiv://2507.19060v1",
"citation_count": 0
},
{
"id": "2507.19057v1",
"title": "Exploring molecular assembly as a biosignature using mass spectrometry and machine learning",
"authors": [
"Lindsay A. Rutter",
"Abhishek Sharma",
"Ian Seet",
"David Obeh Alobo",
"An Goto",
"Leroy Cronin"
],
"abstract": "Molecular assembly offers a promising path to detect life beyond Earth, while\nminimizing assumptions based on terrestrial life. As mass spectrometers will be\ncentral to upcoming Solar System missions, predicting molecular assembly from\ntheir data without needing to elucidate unknown structures will be essential\nfor unbiased life detection. An ideal agnostic biosignature must be\ninterpretable and experimentally measurable. Here, we show that molecular\nassembly, a recently developed approach to measure objects that have been\nproduced by evolution, satisfies both criteria. First, it is interpretable for\nlife detection, as it reflects the assembly of molecules with their bonds as\nbuilding blocks, in contrast to approaches that discount construction history.\nSecond, it can be determined without structural elucidation, as it can be\nphysically measured by mass spectrometry, a property that distinguishes it from\nother approaches that use structure-based information measures for molecular\ncomplexity. Whilst molecular assembly is directly measurable using mass\nspectrometry data, there are limits imposed by mission constraints. To address\nthis, we developed a machine learning model that predicts molecular assembly\nwith high accuracy, reducing error by three-fold compared to baseline models.\nSimulated data shows that even small instrumental inconsistencies can double\nmodel error, emphasizing the need for standardization. These results suggest\nthat standardized mass spectrometry databases could enable accurate molecular\nassembly prediction, without structural elucidation, providing a\nproof-of-concept for future astrobiology missions.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T08:19:15+00:00",
"url": "http://arxiv.org/pdf/2507.19057v1",
"resource_uri": "arxiv://2507.19057v1",
"citation_count": 0
},
{
"id": "2507.19046v1",
"title": "Dynamics-Informed Reservoir Computing with Visibility Graphs",
"authors": [
"Charlotte Geier",
"Merten Stender"
],
"abstract": "Accurate prediction of complex and nonlinear time series remains a\nchallenging problem across engineering and scientific disciplines. Reservoir\ncomputing (RC) offers a computationally efficient alternative to traditional\ndeep learning by training only the read-out layer while employing a randomly\nstructured and fixed reservoir network. Despite its advantages, the largely\nrandom reservoir graph architecture often results in suboptimal and oversized\nnetworks with poorly understood dynamics. Addressing this issue, we propose a\nnovel Dynamics-Informed Reservoir Computing (DyRC) framework that\nsystematically infers the reservoir network structure directly from the input\ntraining sequence. This work proposes to employ the visibility graph (VG)\ntechnique, which converts time series data into networks by representing\nmeasurement points as nodes linked by mutual visibility. The reservoir network\nis constructed by directly adopting the VG network from a training data\nsequence, leveraging the parameter-free visibility graph approach to avoid\nexpensive hyperparameter tuning. This process results in a reservoir that is\ndirectly informed by the specific dynamics of the prediction task under study.\nWe assess the DyRC-VG method through prediction tasks involving the canonical\nnonlinear Duffing oscillator, evaluating prediction accuracy and consistency.\nCompared to an Erd\\H{o}s-R\\'enyi graph of the same size, spectral radius, and\ncomparable density, we observe higher prediction quality and more consistent\nperformance over repeated implementations in the DyRC-VG.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T08:07:17+00:00",
"url": "http://arxiv.org/pdf/2507.19046v1",
"resource_uri": "arxiv://2507.19046v1",
"citation_count": 0
},
{
"id": "2507.19041v1",
"title": "PGKET: A Photonic Gaussian Kernel Enhanced Transformer",
"authors": [
"Ren-Xin Zhao"
],
"abstract": "Self-Attention Mechanisms (SAMs) enhance model performance by extracting key\ninformation but are inefficient when dealing with long sequences. To this end,\na photonic Gaussian Kernel Enhanced Transformer (PGKET) is proposed, based on\nthe Photonic Gaussian Kernel Self-Attention Mechanism (PGKSAM). The PGKSAM\ncalculates the Photonic Gaussian Kernel Self-Attention Score (PGKSAS) using\nphoton interferometry and superposition to process multiple inputs in parallel.\nExperimental results show that PGKET outperforms some state-of-the-art\ntransformers in multi-classification tasks on MedMNIST v2 and CIFAR-10, and is\nexpected to improve performance in complex tasks and accelerate the convergence\nof Photonic Computing (PC) and machine learning.",
"categories": [
"quant-ph",
"cs.CV"
],
"published": "2025-07-25T07:52:24+00:00",
"url": "http://arxiv.org/pdf/2507.19041v1",
"resource_uri": "arxiv://2507.19041v1",
"citation_count": 0
},
{
"id": "2507.19036v1",
"title": "Neural Ordinary Differential Equations for Learning and Extrapolating System Dynamics Across Bifurcations",
"authors": [
"Eva van Tegelen",
"George van Voorn",
"Ioannis Athanasiadis",
"Peter van Heijster"
],
"abstract": "Forecasting system behaviour near and across bifurcations is crucial for\nidentifying potential shifts in dynamical systems. While machine learning has\nrecently been used to learn critical transitions and bifurcation structures\nfrom data, most studies remain limited as they exclusively focus on\ndiscrete-time methods and local bifurcations. To address these limitations, we\nuse Neural Ordinary Differential Equations which provide a continuous,\ndata-driven framework for learning system dynamics. We apply our approach to a\npredator-prey system that features both local and global bifurcations,\npresenting a challenging test case. Our results show that Neural Ordinary\nDifferential Equations can recover underlying bifurcation structures directly\nfrom timeseries data by learning parameter-dependent vector fields. Notably, we\ndemonstrate that Neural Ordinary Differential Equations can forecast\nbifurcations even beyond the parameter regions represented in the training\ndata. We also assess the method's performance under limited and noisy data\nconditions, finding that model accuracy depends more on the quality of\ninformation that can be inferred from the training data, than on the amount of\ndata available.",
"categories": [
"cs.LG",
"math.DS"
],
"published": "2025-07-25T07:44:34+00:00",
"url": "http://arxiv.org/pdf/2507.19036v1",
"resource_uri": "arxiv://2507.19036v1",
"citation_count": 0
},
{
"id": "2507.19031v1",
"title": "ProGMLP: A Progressive Framework for GNN-to-MLP Knowledge Distillation with Efficient Trade-offs",
"authors": [
"Weigang Lu",
"Ziyu Guan",
"Wei Zhao",
"Yaming Yang",
"Yujie Sun",
"Zheng Liang",
"Yibing Zhan",
"Dapeng Tao"
],
"abstract": "GNN-to-MLP (G2M) methods have emerged as a promising approach to accelerate\nGraph Neural Networks (GNNs) by distilling their knowledge into simpler\nMulti-Layer Perceptrons (MLPs). These methods bridge the gap between the\nexpressive power of GNNs and the computational efficiency of MLPs, making them\nwell-suited for resource-constrained environments. However, existing G2M\nmethods are limited by their inability to flexibly adjust inference cost and\naccuracy dynamically, a critical requirement for real-world applications where\ncomputational resources and time constraints can vary significantly. To address\nthis, we introduce a Progressive framework designed to offer flexible and\non-demand trade-offs between inference cost and accuracy for GNN-to-MLP\nknowledge distillation (ProGMLP). ProGMLP employs a Progressive Training\nStructure (PTS), where multiple MLP students are trained in sequence, each\nbuilding on the previous one. Furthermore, ProGMLP incorporates Progressive\nKnowledge Distillation (PKD) to iteratively refine the distillation process\nfrom GNNs to MLPs, and Progressive Mixup Augmentation (PMA) to enhance\ngeneralization by progressively generating harder mixed samples. Our approach\nis validated through comprehensive experiments on eight real-world graph\ndatasets, demonstrating that ProGMLP maintains high accuracy while dynamically\nadapting to varying runtime scenarios, making it highly effective for\ndeployment in diverse application settings.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T07:35:09+00:00",
"url": "http://arxiv.org/pdf/2507.19031v1",
"resource_uri": "arxiv://2507.19031v1",
"citation_count": 0
},
{
"id": "2507.19028v1",
"title": "Nonparametric Linear Discriminant Analysis for High Dimensional Matrix-Valued Data",
"authors": [
"Seungyeon Oh",
"Seongoh Park",
"Hoyoung Park"
],
"abstract": "This paper addresses classification problems with matrix-valued data, which\ncommonly arises in applications such as neuroimaging and signal processing.\nBuilding on the assumption that the data from each class follows a matrix\nnormal distribution, we propose a novel extension of Fisher's Linear\nDiscriminant Analysis (LDA) tailored for matrix-valued observations. To\neffectively capture structural information while maintaining estimation\nflexibility, we adopt a nonparametric empirical Bayes framework based on\nNonparametric Maximum Likelihood Estimation (NPMLE), applied to vectorized and\nscaled matrices. The NPMLE method has been shown to provide robust, flexible,\nand accurate estimates for vector-valued data with various structures in the\nmean vector or covariance matrix. By leveraging its strengths, our method is\neffectively generalized to the matrix setting, thereby improving classification\nperformance. Through extensive simulation studies and real data applications,\nincluding electroencephalography (EEG) and magnetic resonance imaging (MRI)\nanalysis, we demonstrate that the proposed method consistently outperforms\nexisting approaches across a variety of data structures.",
"categories": [
"stat.ME",
"stat.AP",
"stat.ML"
],
"published": "2025-07-25T07:30:24+00:00",
"url": "http://arxiv.org/pdf/2507.19028v1",
"resource_uri": "arxiv://2507.19028v1",
"citation_count": 0
},
{
"id": "2507.19017v1",
"title": "MindSpeed RL: Distributed Dataflow for Scalable and Efficient RL Training on Ascend NPU Cluster",
"authors": [
"Laingjun Feng",
"Chenyi Pan",
"Xinjie Guo",
"Fei Mei",
"Benzhe Ning",
"Jianxiang Zhang",
"Xinyang Liu",
"Beirong Zhou",
"Zeng Shu",
"Chang Liu",
"Guang Yang",
"Zhenyu Han",
"Jiangben Wang",
"Bo Wang"
],
"abstract": "Reinforcement learning (RL) is a paradigm increasingly used to align large\nlanguage models. Popular RL algorithms utilize multiple workers and can be\nmodeled as a graph, where each node is the status of a worker and each edge\nrepresents dataflow between nodes. Owing to the heavy cross-node dependencies,\nthe RL training system usually suffers from poor cluster scalability and low\nmemory utilization. In this article, we introduce MindSpeed RL, an effective\nand efficient system for large-scale RL training. Unlike existing centralized\nmethods, MindSpeed RL organizes the essential data dependencies in RL training,\ni.e., sample flow and resharding flow, from a distributed view. On the one\nhand, a distributed transfer dock strategy, which sets controllers and\nwarehouses on the basis of the conventional replay buffer, is designed to\nrelease the dispatch overhead in the sample flow. A practical allgather--swap\nstrategy is presented to eliminate redundant memory usage in resharding flow.\nIn addition, MindSpeed RL further integrates numerous parallelization\nstrategies and acceleration techniques for systematic optimization. Compared\nwith existing state-of-the-art systems, comprehensive experiments on the RL\ntraining of popular Qwen2.5-Dense-7B/32B, Qwen3-MoE-30B, and\nDeepSeek-R1-MoE-671B show that MindSpeed RL increases the throughput by 1.42 ~\n3.97 times. Finally, we open--source MindSpeed RL and perform all the\nexperiments on a super pod of Ascend with 384 neural processing units (NPUs) to\ndemonstrate the powerful performance and reliability of Ascend.",
"categories": [
"cs.LG",
"cs.AI",
"CS"
],
"published": "2025-07-25T07:11:49+00:00",
"url": "http://arxiv.org/pdf/2507.19017v1",
"resource_uri": "arxiv://2507.19017v1",
"citation_count": 0
},
{
"id": "2507.19003v1",
"title": "A diffusion-based generative model for financial time series via geometric Brownian motion",
"authors": [
"Gihun Kim",
"Sun-Yong Choi",
"Yeoneung Kim"
],
"abstract": "We propose a novel diffusion-based generative framework for financial time\nseries that incorporates geometric Brownian motion (GBM), the foundation of the\nBlack--Scholes theory, into the forward noising process. Unlike standard\nscore-based models that treat price trajectories as generic numerical\nsequences, our method injects noise proportionally to asset prices at each time\nstep, reflecting the heteroskedasticity observed in financial time series. By\naccurately balancing the drift and diffusion terms, we show that the resulting\nlog-price process reduces to a variance-exploding stochastic differential\nequation, aligning with the formulation in score-based generative models. The\nreverse-time generative process is trained via denoising score matching using a\nTransformer-based architecture adapted from the Conditional Score-based\nDiffusion Imputation (CSDI) framework. Empirical evaluations on historical\nstock data demonstrate that our model reproduces key stylized facts\nheavy-tailed return distributions, volatility clustering, and the leverage\neffect more realistically than conventional diffusion models.",
"categories": [
"cs.LG",
"cs.AI",
"cs.NA",
"math.NA",
"60H10, 91G80, 91G60"
],
"published": "2025-07-25T07:02:09+00:00",
"url": "http://arxiv.org/pdf/2507.19003v1",
"resource_uri": "arxiv://2507.19003v1",
"citation_count": 0
},
{
"id": "2507.18996v1",
"title": "Adapting to Fragmented and Evolving Data: A Fisher Information Perspective",
"authors": [
"Behraj Khan",
"Tahir Qasim Syed",
"Nouman Muhammad Durrani"
],
"abstract": "Modern machine learning systems operating in dynamic environments often face\n\\textit{sequential covariate shift} (SCS), where input distributions evolve\nover time while the conditional distribution remains stable. We introduce FADE\n(Fisher-based Adaptation to Dynamic Environments), a lightweight and\ntheoretically grounded framework for robust learning under SCS. FADE employs a\nshift-aware regularization mechanism anchored in Fisher information geometry,\nguiding adaptation by modulating parameter updates based on sensitivity and\nstability. To detect significant distribution changes, we propose a\nCramer-Rao-informed shift signal that integrates KL divergence with temporal\nFisher dynamics. Unlike prior methods requiring task boundaries, target\nsupervision, or experience replay, FADE operates online with fixed memory and\nno access to target labels. Evaluated on seven benchmarks spanning vision,\nlanguage, and tabular data, FADE achieves up to 19\\% higher accuracy under\nsevere shifts, outperforming methods such as TENT and DIW. FADE also\ngeneralizes naturally to federated learning by treating heterogeneous clients\nas temporally fragmented environments, enabling scalable and stable adaptation\nin decentralized settings. Theoretical analysis guarantees bounded regret and\nparameter consistency, while empirical results demonstrate FADE's robustness\nacross modalities and shift intensities.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T06:50:09+00:00",
"url": "http://arxiv.org/pdf/2507.18996v1",
"resource_uri": "arxiv://2507.18996v1",
"citation_count": 0
},
{
"id": "2507.18993v1",
"title": "Agent0: Leveraging LLM Agents to Discover Multi-value Features from Text for Enhanced Recommendations",
"authors": [
"Blaž Škrlj",
"Benoît Guilleminot",
"Andraž Tori"
],
"abstract": "Large language models (LLMs) and their associated agent-based frameworks have\nsignificantly advanced automated information extraction, a critical component\nof modern recommender systems. While these multitask frameworks are widely used\nin code generation, their application in data-centric research is still largely\nuntapped. This paper presents Agent0, an LLM-driven, agent-based system\ndesigned to automate information extraction and feature construction from raw,\nunstructured text. Categorical features are crucial for large-scale recommender\nsystems but are often expensive to acquire. Agent0 coordinates a group of\ninteracting LLM agents to automatically identify the most valuable text aspects\nfor subsequent tasks (such as models or AutoML pipelines). Beyond its feature\nengineering capabilities, Agent0 also offers an automated prompt-engineering\ntuning method that utilizes dynamic feedback loops from an oracle. Our findings\ndemonstrate that this closed-loop methodology is both practical and effective\nfor automated feature discovery, which is recognized as one of the most\nchallenging phases in current recommender system development.",
"categories": [
"cs.IR",
"cs.LG"
],
"published": "2025-07-25T06:45:10+00:00",
"url": "http://arxiv.org/pdf/2507.18993v1",
"resource_uri": "arxiv://2507.18993v1",
"citation_count": 0
},
{
"id": "2507.18992v1",
"title": "Reinforcement Learning via Conservative Agent for Environments with Random Delays",
"authors": [
"Jongsoo Lee",
"Jangwon Kim",
"Jiseok Jeong",
"Soohee Han"
],
"abstract": "Real-world reinforcement learning applications are often hindered by delayed\nfeedback from environments, which violates the Markov assumption and introduces\nsignificant challenges. Although numerous delay-compensating methods have been\nproposed for environments with constant delays, environments with random delays\nremain largely unexplored due to their inherent variability and\nunpredictability. In this study, we propose a simple yet robust agent for\ndecision-making under random delays, termed the conservative agent, which\nreformulates the random-delay environment into its constant-delay equivalent.\nThis transformation enables any state-of-the-art constant-delay method to be\ndirectly extended to the random-delay environments without modifying the\nalgorithmic structure or sacrificing performance. We evaluate the conservative\nagent-based algorithm on continuous control tasks, and empirical results\ndemonstrate that it significantly outperforms existing baseline algorithms in\nterms of asymptotic performance and sample efficiency.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T06:41:06+00:00",
"url": "http://arxiv.org/pdf/2507.18992v1",
"resource_uri": "arxiv://2507.18992v1",
"citation_count": 0
},
{
"id": "2507.18989v1",
"title": "GENIAL: Generative Design Space Exploration via Network Inversion for Low Power Algorithmic Logic Units",
"authors": [
"Maxence Bouvier",
"Ryan Amaudruz",
"Felix Arnold",
"Renzo Andri",
"Lukas Cavigelli"
],
"abstract": "As AI workloads proliferate, optimizing arithmetic units is becoming\nincreasingly important to reduce the footprint of digital systems. Conventional\ndesign flows, which often rely on manual or heuristics-based optimization, are\nlimited in their ability to thoroughly explore the vast design space. In this\npaper, we introduce GENIAL, a machine learning-based framework for the\nautomatic generation and optimization of arithmetic units, more specifically\nmultipliers.\n At the core of GENIAL is a Transformer-based surrogate model trained in two\nstages, involving self-supervised pretraining followed by supervised\nfinetuning, to robustly forecast key hardware metrics such as power and area\nfrom abstracted design representations. By inverting the surrogate model,\nGENIAL efficiently searches for new operand encodings that directly minimize\npower consumption in arithmetic units for specific input data distributions.\nExtensive experiments on large datasets demonstrate that GENIAL is consistently\nmore sample efficient than other methods, and converges faster towards\noptimized designs. This enables to deploy a high-effort logic synthesis\noptimization flow in the loop, improving the accuracy of the surrogate model.\nNotably, GENIAL automatically discovers encodings that achieve up to 18%\nswitching activity savings within multipliers on representative AI workloads\ncompared with the conventional two's complement. We also demonstrate the\nversatility of our approach by achieving significant improvements on Finite\nState Machines, highlighting GENIAL's applicability for a wide spectrum of\nlogic functions. Together, these advances mark a significant step toward\nautomated Quality-of-Results-optimized combinational circuit generation for\ndigital systems.",
"categories": [
"cs.LG",
"cs.AI",
"cs.AR"
],
"published": "2025-07-25T06:34:59+00:00",
"url": "http://arxiv.org/pdf/2507.18989v1",
"resource_uri": "arxiv://2507.18989v1",
"citation_count": 0
},
{
"id": "2507.18987v1",
"title": "Differentiated Thyroid Cancer Recurrence Classification Using Machine Learning Models and Bayesian Neural Networks with Varying Priors: A SHAP-Based Interpretation of the Best Performing Model",
"authors": [
"HMNS Kumari",
"HMLS Kumari",
"UMMPK Nawarathne"
],
"abstract": "Differentiated thyroid cancer DTC recurrence is a major public health\nconcern, requiring classification and predictive models that are not only\naccurate but also interpretable and uncertainty aware. This study introduces a\ncomprehensive framework for DTC recurrence classification using a dataset\ncontaining 383 patients and 16 clinical and pathological variables. Initially,\n11 machine learning ML models were employed using the complete dataset, where\nthe Support Vector Machines SVM model achieved the highest accuracy of 0.9481.\nTo reduce complexity and redundancy, feature selection was carried out using\nthe Boruta algorithm, and the same ML models were applied to the reduced\ndataset, where it was observed that the Logistic Regression LR model obtained\nthe maximum accuracy of 0.9611. However, these ML models often lack uncertainty\nquantification, which is critical in clinical decision making. Therefore, to\naddress this limitation, the Bayesian Neural Networks BNN with six varying\nprior distributions, including Normal 0,1, Normal 0,10, Laplace 0,1, Cauchy\n0,1, Cauchy 0,2.5, and Horseshoe 1, were implemented on both the complete and\nreduced datasets. The BNN model with Normal 0,10 prior distribution exhibited\nmaximum accuracies of 0.9740 and 0.9870 before and after feature selection,\nrespectively.",
"categories": [
"cs.LG",
"cs.AI"
],
"published": "2025-07-25T06:31:31+00:00",
"url": "http://arxiv.org/pdf/2507.18987v1",
"resource_uri": "arxiv://2507.18987v1",
"citation_count": 0
},
{
"id": "2507.18983v1",
"title": "KASPER: Kolmogorov Arnold Networks for Stock Prediction and Explainable Regimes",
"authors": [
"Vidhi Oad",
"Param Pathak",
"Nouhaila Innan",
"Shalini D",
"Muhammad Shafique"
],
"abstract": "Forecasting in financial markets remains a significant challenge due to their\nnonlinear and regime-dependent dynamics. Traditional deep learning models, such\nas long short-term memory networks and multilayer perceptrons, often struggle\nto generalize across shifting market conditions, highlighting the need for a\nmore adaptive and interpretable approach. To address this, we introduce\nKolmogorov-Arnold networks for stock prediction and explainable regimes\n(KASPER), a novel framework that integrates regime detection, sparse\nspline-based function modeling, and symbolic rule extraction. The framework\nidentifies hidden market conditions using a Gumbel-Softmax-based mechanism,\nenabling regime-specific forecasting. For each regime, it employs\nKolmogorov-Arnold networks with sparse spline activations to capture intricate\nprice behaviors while maintaining robustness. Interpretability is achieved\nthrough symbolic learning based on Monte Carlo Shapley values, which extracts\nhuman-readable rules tailored to each regime. Applied to real-world financial\ntime series from Yahoo Finance, the model achieves an $R^2$ score of 0.89, a\nSharpe Ratio of 12.02, and a mean squared error as low as 0.0001, outperforming\nexisting methods. This research establishes a new direction for regime-aware,\ntransparent, and robust forecasting in financial markets.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T06:21:24+00:00",
"url": "http://arxiv.org/pdf/2507.18983v1",
"resource_uri": "arxiv://2507.18983v1",
"citation_count": 0
},
{
"id": "2507.18975v1",
"title": "Secure Best Arm Identification in the Presence of a Copycat",
"authors": [
"Asaf Cohen",
"Onur Günlü"
],
"abstract": "Consider the problem of best arm identification with a security constraint.\nSpecifically, assume a setup of stochastic linear bandits with $K$ arms of\ndimension $d$. In each arm pull, the player receives a reward that is the sum\nof the dot product of the arm with an unknown parameter vector and independent\nnoise. The player's goal is to identify the best arm after $T$ arm pulls.\nMoreover, assume a copycat Chloe is observing the arm pulls. The player wishes\nto keep Chloe ignorant of the best arm.\n While a minimax--optimal algorithm identifies the best arm with an\n$\\Omega\\left(\\frac{T}{\\log(d)}\\right)$ error exponent, it easily reveals its\nbest-arm estimate to an outside observer, as the best arms are played more\nfrequently. A naive secure algorithm that plays all arms equally results in an\n$\\Omega\\left(\\frac{T}{d}\\right)$ exponent. In this paper, we propose a secure\nalgorithm that plays with \\emph{coded arms}. The algorithm does not require any\nkey or cryptographic primitives, yet achieves an\n$\\Omega\\left(\\frac{T}{\\log^2(d)}\\right)$ exponent while revealing almost no\ninformation on the best arm.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T06:00:44+00:00",
"url": "http://arxiv.org/pdf/2507.18975v1",
"resource_uri": "arxiv://2507.18975v1",
"citation_count": 0
},
{
"id": "2507.18974v1",
"title": "Neural network ensemble for computing cross sections for rotational transitions in H$_{2}$O + H$_{2}$O collisions",
"authors": [
"Bikramaditya Mandal",
"Dmitri Babikov",
"Phillip C. Stancil",
"Robert C. Forrey",
"Roman V. Krems",
"Naduvalath Balakrishnan"
],
"abstract": "Water (H$_2$O) is one of the most abundant molecules in the universe and is\nfound in a wide variety of astrophysical environments. Rotational transitions\nin H$_2$O + H$_2$O collisions are important in modeling environments rich in\nwater molecules but they are computationally intractable using quantum\nmechanical methods. Here, we present a machine learning (ML) tool using an\nensemble of neural networks (NNs) to predict cross sections to construct a\ndatabase of rate coefficients for rotationally inelastic transitions in\ncollisions of complex molecules such as water. The proposed methodology\nutilizes data computed with a mixed quantum-classical theory (MQCT). We\nillustrate that efficient ML models using NN can be built to accurately\ninterpolate in the space of 12 quantum numbers for rotational transitions in\ntwo asymmetric top molecules, spanning both initial and final states. We\nexamine various architectures of data corresponding to each collision energy,\nsymmetry of water molecule, and excitation/de-excitation rotational\ntransitions, and optimize the training/validation data sets. Using only about\n10\\% of the computed data for training, the NNs predict cross sections of\nstate-to-state rotational transitions of H$_{2}$O + H$_{2}$O collision with\naverage relative root mean square error of 0.409. Thermally averaged cross\nsections, computed using the predicted state-to-state cross sections\n($\\sim$90\\%) and the data used for training and validation ($\\sim$10\\%) were\ncompared against those obtained entirely from MQCT calculations. The agreement\nis found to be excellent with an average percent deviation of about\n$\\sim$13.5\\%. The methodology is robust, and thus, applicable to other complex\nmolecular systems.",
"categories": [
"physics.chem-ph",
"physics.comp-ph",
"physics.space-ph",
"quant-ph"
],
"published": "2025-07-25T05:59:32+00:00",
"url": "http://arxiv.org/pdf/2507.18974v1",
"resource_uri": "arxiv://2507.18974v1",
"citation_count": 0
},
{
"id": "2507.18973v1",
"title": "A Toolbox, Not a Hammer -- Multi-TAG: Scaling Math Reasoning with Multi-Tool Aggregation",
"authors": [
"Bohan Yao",
"Vikas Yadav"
],
"abstract": "Augmenting large language models (LLMs) with external tools is a promising\navenue for developing high-performance mathematical reasoning systems. Prior\ntool-augmented approaches typically finetune an LLM to select and invoke a\nsingle tool at each reasoning step and show promising results on simpler math\nreasoning benchmarks such as GSM8K. However, these approaches struggle with\nmore complex math problems that require precise reasoning over multiple steps.\nTo address this limitation, in this work, we propose Multi-TAG, a Multi-Tool\nAGgregation-based framework. Instead of relying on a single tool, Multi-TAG\nguides an LLM to concurrently invoke multiple tools at each reasoning step. It\nthen aggregates their diverse outputs to verify and refine the reasoning\nprocess, enhancing solution robustness and accuracy. Notably, Multi-TAG is a\nfinetuning-free, inference-only framework, making it readily applicable to any\nLLM backbone, including large open-weight models which are computationally\nexpensive to finetune and proprietary frontier models which cannot be finetuned\nwith custom recipes. We evaluate Multi-TAG on four challenging benchmarks:\nMATH500, AIME, AMC, and OlympiadBench. Across both open-weight and\nclosed-source LLM backbones, Multi-TAG consistently and substantially\noutperforms state-of-the-art baselines, achieving average improvements of 6.0%\nto 7.5% over state-of-the-art baselines.",
"categories": [
"cs.CL",
"cs.AI",
"cs.LG"
],
"published": "2025-07-25T05:57:47+00:00",
"url": "http://arxiv.org/pdf/2507.18973v1",
"resource_uri": "arxiv://2507.18973v1",
"citation_count": 0
},
{
"id": "2507.18972v1",
"title": "TiVy: Time Series Visual Summary for Scalable Visualization",
"authors": [
"Gromit Yeuk-Yin Chan",
"Luis Gustavo Nonato",
"Themis Palpanas",
"Cláudio T. Silva",
"Juliana Freire"
],
"abstract": "Visualizing multiple time series presents fundamental tradeoffs between\nscalability and visual clarity. Time series capture the behavior of many\nlarge-scale real-world processes, from stock market trends to urban activities.\nUsers often gain insights by visualizing them as line charts, juxtaposing or\nsuperposing multiple time series to compare them and identify trends and\npatterns. However, existing representations struggle with scalability: when\ncovering long time spans, leading to visual clutter from too many small\nmultiples or overlapping lines. We propose TiVy, a new algorithm that\nsummarizes time series using sequential patterns. It transforms the series into\na set of symbolic sequences based on subsequence visual similarity using\nDynamic Time Warping (DTW), then constructs a disjoint grouping of similar\nsubsequences based on the frequent sequential patterns. The grouping result, a\nvisual summary of time series, provides uncluttered superposition with fewer\nsmall multiples. Unlike common clustering techniques, TiVy extracts similar\nsubsequences (of varying lengths) aligned in time. We also present an\ninteractive time series visualization that renders large-scale time series in\nreal-time. Our experimental evaluation shows that our algorithm (1) extracts\nclear and accurate patterns when visualizing time series data, (2) achieves a\nsignificant speed-up (1000X) compared to a straightforward DTW clustering. We\nalso demonstrate the efficiency of our approach to explore hidden structures in\nmassive time series data in two usage scenarios.",
"categories": [
"cs.GR",
"cs.LG"
],
"published": "2025-07-25T05:50:01+00:00",
"url": "http://arxiv.org/pdf/2507.18972v1",
"resource_uri": "arxiv://2507.18972v1",
"citation_count": 0
},
{
"id": "2507.18967v1",
"title": "Underwater Waste Detection Using Deep Learning A Performance Comparison of YOLOv7 to 10 and Faster RCNN",
"authors": [
"UMMPK Nawarathne",
"HMNS Kumari",
"HMLS Kumari"
],
"abstract": "Underwater pollution is one of today's most significant environmental\nconcerns, with vast volumes of garbage found in seas, rivers, and landscapes\naround the world. Accurate detection of these waste materials is crucial for\nsuccessful waste management, environmental monitoring, and mitigation\nstrategies. In this study, we investigated the performance of five cutting-edge\nobject recognition algorithms, namely YOLO (You Only Look Once) models,\nincluding YOLOv7, YOLOv8, YOLOv9, YOLOv10, and Faster Region-Convolutional\nNeural Network (R-CNN), to identify which model was most effective at\nrecognizing materials in underwater situations. The models were thoroughly\ntrained and tested on a large dataset containing fifteen different classes\nunder diverse conditions, such as low visibility and variable depths. From the\nabove-mentioned models, YOLOv8 outperformed the others, with a mean Average\nPrecision (mAP) of 80.9%, indicating a significant performance. This increased\nperformance is attributed to YOLOv8's architecture, which incorporates advanced\nfeatures such as improved anchor-free mechanisms and self-supervised learning,\nallowing for more precise and efficient recognition of items in a variety of\nsettings. These findings highlight the YOLOv8 model's potential as an effective\ntool in the global fight against pollution, improving both the detection\ncapabilities and scalability of underwater cleanup operations.",
"categories": [
"cs.CV",
"cs.AI",
"cs.LG"
],
"published": "2025-07-25T05:36:37+00:00",
"url": "http://arxiv.org/pdf/2507.18967v1",
"resource_uri": "arxiv://2507.18967v1",
"citation_count": 0
},
{
"id": "2507.18964v1",
"title": "End-to-End Photodissociation Dynamics of Energized H$_2$COO",
"authors": [
"Cangtao Yin",
"Silvan Käser",
"Meenu Upadhyay",
"Markus Meuwly"
],
"abstract": "The end-to-end dynamics of the smallest energized Criegee intermediate,\nH$_2$COO, was characterized for vibrational excitation close to and a few\nkcal/mol above the barrier for hydrogen transfer. From an aggregate of at least\n5 $\\mu$s of molecular dynamics simulations using a neural\nnetwork-representation of CASPT2/aug-cc-pVTZ reference data, the branching\nratios into molecular products HCO+OH, CO$_2$+H$_2$, or H$_2$O+CO was\nquantitatively determined. Consistent with earlier calculations and recent\nexperiments, decay into HCO+OH was found to be rare $(\\sim 2 \\%)$ whereas the\nother two molecular product channels are accessed with fractions of $\\sim 30\n\\%$ and $\\sim 20 \\%$, respectively. On the 1 ns time scale, which was the\nlength of an individual MD simulation, more than 40 \\% of the systems remain in\nthe reactant state due to partial intramolecular vibrational redistribution\n(IVR). Formation of CO$_2$+H$_2$ occurs through a bifurcating pathway, one of\nwhich passes through formic acid whereas the more probable route connects the\ndi-radical OCH$_2$O with the product through a low-lying transition state.\nNotably, none of the intermediates along the pathway accumulate and their\nmaximum concentration always remains well below 5 \\%. This work demonstrates\nthat atomistic simulations with global reactive machine-learned energy\nfunctions provide a quantitative understanding of the chemistry and reaction\ndynamics for atmospheric reactions in the gas phase.",
"categories": [
"physics.chem-ph"
],
"published": "2025-07-25T05:28:27+00:00",
"url": "http://arxiv.org/pdf/2507.18964v1",
"resource_uri": "arxiv://2507.18964v1",
"citation_count": 0
},
{
"id": "2507.18954v1",
"title": "Almost fault--tolerant quantum machine learning with drastic overhead reduction",
"authors": [
"Haiyue Kang",
"Younghun Kim",
"Eromanga Adermann",
"Martin Sevior",
"Muhammad Usman"
],
"abstract": "Errors in the current generation of quantum processors pose a significant\nchallenge towards practical-scale implementations of quantum machine learning\n(QML) as they lead to trainability issues arising from noise-induced barren\nplateaus, as well as performance degradations due to the noise accumulation in\ndeep circuits even when QML models are free from barren plateaus. Quantum error\ncorrection (QEC) protocols are being developed to overcome hardware noise, but\ntheir extremely high spacetime overheads, mainly due to magic state\ndistillation, make them infeasible for near-term practical implementation. This\nwork proposes the idea of partial quantum error correction (QEC) for quantum\nmachine learning (QML) models and identifies a sweet spot where distillations\nare omitted to significantly reduce overhead. By assuming error-corrected\ntwo-qubit CNOTs (Clifford operations), we demonstrate that the QML models\nremain trainable even when single-qubit gates are subjected to $\\approx0.2\\%$\ndepolarizing noise, corresponding to a gate error rate of $\\approx0.13\\%$ under\nrandomized benchmarking. Further analysis based on various noise models, such\nas phase-damping and thermal-dissipation channels at low temperature, indicates\nthat the QML models are trainable independent of the mean angle of\nover-rotation, or can even be improved by thermal damping that purifies a\nquantum state away from depolarizations. While it may take several years to\nbuild quantum processors capable of fully fault-tolerant QML, our work proposes\na resource-efficient solution for trainable and high-accuracy QML\nimplementations in noisy environments.",
"categories": [
"quant-ph"
],
"published": "2025-07-25T04:43:37+00:00",
"url": "http://arxiv.org/pdf/2507.18954v1",
"resource_uri": "arxiv://2507.18954v1",
"citation_count": 0
},
{
"id": "2507.18952v1",
"title": "Legal Document Summarization: Enhancing Judicial Efficiency through Automation Detection",
"authors": [
"Yongjie Li",
"Ruilin Nong",
"Jianan Liu",
"Lucas Evans"
],
"abstract": "Legal document summarization represents a significant advancement towards\nimproving judicial efficiency through the automation of key information\ndetection. Our approach leverages state-of-the-art natural language processing\ntechniques to meticulously identify and extract essential data from extensive\nlegal texts, which facilitates a more efficient review process. By employing\nadvanced machine learning algorithms, the framework recognizes underlying\npatterns within judicial documents to create precise summaries that encapsulate\nthe crucial elements. This automation alleviates the burden on legal\nprofessionals, concurrently reducing the likelihood of overlooking vital\ninformation that could lead to errors. Through comprehensive experiments\nconducted with actual legal datasets, we demonstrate the capability of our\nmethod to generate high-quality summaries while preserving the integrity of the\noriginal content and enhancing processing times considerably. The results\nreveal marked improvements in operational efficiency, allowing legal\npractitioners to direct their efforts toward critical analytical and\ndecision-making activities instead of manual reviews. This research highlights\npromising technology-driven strategies that can significantly alter workflow\ndynamics within the legal sector, emphasizing the role of automation in\nrefining judicial processes.",
"categories": [
"cs.CL"
],
"published": "2025-07-25T04:39:33+00:00",
"url": "http://arxiv.org/pdf/2507.18952v1",
"resource_uri": "arxiv://2507.18952v1",
"citation_count": 0
},
{
"id": "2507.18949v1",
"title": "Adaptive Learning Systems: Personalized Curriculum Design Using LLM-Powered Analytics",
"authors": [
"Yongjie Li",
"Ruilin Nong",
"Jianan Liu",
"Lucas Evans"
],
"abstract": "Large language models (LLMs) are revolutionizing the field of education by\nenabling personalized learning experiences tailored to individual student\nneeds. In this paper, we introduce a framework for Adaptive Learning Systems\nthat leverages LLM-powered analytics for personalized curriculum design. This\ninnovative approach uses advanced machine learning to analyze real-time data,\nallowing the system to adapt learning pathways and recommend resources that\nalign with each learner's progress. By continuously assessing students, our\nframework enhances instructional strategies, ensuring that the materials\npresented are relevant and engaging. Experimental results indicate a marked\nimprovement in both learner engagement and knowledge retention when using a\ncustomized curriculum. Evaluations conducted across varied educational\nenvironments demonstrate the framework's flexibility and positive influence on\nlearning outcomes, potentially reshaping conventional educational practices\ninto a more adaptive and student-centered model.",
"categories": [
"cs.CY",
"cs.CL"
],
"published": "2025-07-25T04:36:17+00:00",
"url": "http://arxiv.org/pdf/2507.18949v1",
"resource_uri": "arxiv://2507.18949v1",
"citation_count": 0
},
{
"id": "2507.18937v1",
"title": "CNN-based Surface Temperature Forecasts with Ensemble Numerical Weather Prediction over Medium-range Forecast Periods",
"authors": [
"Takuya Inoue",
"Takuya Kawabata"
],
"abstract": "This study proposes a method that integrates convolutional neural networks\n(CNNs) with ensemble numerical weather prediction (NWP) models, enabling\nsurface temperature forecasting at lead times beyond the short-range (five-day)\nforecast period. Owing to limited computational resources, operational\nmedium-range temperature forecasts typically rely on low-resolution NWP models,\nwhich are prone to systematic and random errors. To resolve these limitations,\nthe proposed method first reduces systematic errors through CNN-based\npost-processing (bias correction and spatial super-resolution) on each ensemble\nmember, reconstructing high-resolution temperature fields from low-resolution\nmodel outputs. Second, it reduces random errors through ensemble averaging of\nthe CNN-corrected members. This study also investigates whether the sequence of\nCNN correction and ensemble averaging affects the forecast accuracy. For\ncomparison with the proposed method, we additionally conducted experiments with\nthe CNN trained on ensemble-averaged forecasts. The first approach--CNN\ncorrection before ensemble averaging--consistently achieved higher accuracy\nthan the reverse approach. Although based on low-resolution ensemble forecasts,\nthe proposed method notably outperformed the high-resolution deterministic NWP\nmodels. These findings indicate that combining CNN-based correction with\nensemble averaging effectively reduces both the systematic and random errors in\nNWP model outputs. The proposed approach is a practical and scalable solution\nfor improving medium-range temperature forecasts, and is particularly valuable\nat operational centers with limited computational resources.",
"categories": [
"physics.ao-ph",
"cs.AI",
"cs.LG"
],
"published": "2025-07-25T04:19:05+00:00",
"url": "http://arxiv.org/pdf/2507.18937v1",
"resource_uri": "arxiv://2507.18937v1",
"citation_count": 0
},
{
"id": "2507.18935v1",
"title": "Accuracy and Limitations of Machine-Learned Interatomic Potentials for Magnetic Systems: A Case Study on Fe-Cr-C",
"authors": [
"E. O. Khazieva",
"N. M. Chtchelkatchev",
"R. E. Ryltsev"
],
"abstract": "Machine-learned interatomic potentials (MLIPs) have become the gold standard\nfor atomistic simulations, yet their extension to magnetic materials remains\nchallenging because spin fluctuations must be captured either explicitly or\nimplicitly. We address this problem for the technologically vital Fe-Cr-C\nsystem by constructing two deep machine learning potentials in DeePMD\nrealization: one trained on non-magnetic DFT data (DP-NM) and one on\nspin-polarised DFT data (DP-M). Extensive validation against experiments\nreveals a striking dichotomy. The dynamic, collective properties, viscosity and\nmelting temperatures are reproduced accurately by DP-NM but are incorrectly\nestimated by DP-M. Static, local properties, density, and lattice parameters\nare captured excellently by DP-M, especially in Fe-rich alloys, whereas DP-NM\nfails. This behaviour is explained by general properties of paramagnetic state:\nat high temperature, local magnetic moments self-average in space and time, so\ntheir explicit treatment is unnecessary for transport properties but essential\nfor equilibrium volumes. Exploiting this insight, we show that a\ntransfer-learning protocol, pre-training on non-magnetic DFT and fine-tuning on\na small set of spin-polarised data, reduces the computational cost to develop\nmagnetic MLIPs by more than an order of magnitude. Developing general-purpose\npotentials that capture static and dynamic behaviors throughout the whole\ncomposition space requires proper accounting for temperature-induced spin\nfluctuations in DFT calculations and correctly incorporating spin degrees of\nfreedom into classical force fields.",
"categories": [
"cond-mat.mtrl-sci",
"physics.chem-ph",
"physics.comp-ph"
],
"published": "2025-07-25T04:06:07+00:00",
"url": "http://arxiv.org/pdf/2507.18935v1",
"resource_uri": "arxiv://2507.18935v1",
"citation_count": 0
},
{
"id": "2507.18926v1",
"title": "Geometric Multi-color Message Passing Graph Neural Networks for Blood-brain Barrier Permeability Prediction",
"authors": [
"Trung Nguyen",
"Md Masud Rana",
"Farjana Tasnim Mukta",
"Chang-Guo Zhan",
"Duc Duy Nguyen"
],
"abstract": "Accurate prediction of blood-brain barrier permeability (BBBP) is essential\nfor central nervous system (CNS) drug development. While graph neural networks\n(GNNs) have advanced molecular property prediction, they often rely on\nmolecular topology and neglect the three-dimensional geometric information\ncrucial for modeling transport mechanisms. This paper introduces the geometric\nmulti-color message-passing graph neural network (GMC-MPNN), a novel framework\nthat enhances standard message-passing architectures by explicitly\nincorporating atomic-level geometric features and long-range interactions. Our\nmodel constructs weighted colored subgraphs based on atom types to capture the\nspatial relationships and chemical context that govern BBB permeability. We\nevaluated GMC-MPNN on three benchmark datasets for both classification and\nregression tasks, using rigorous scaffold-based splitting to ensure a robust\nassessment of generalization. The results demonstrate that GMC-MPNN\nconsistently outperforms existing state-of-the-art models, achieving superior\nperformance in both classifying compounds as permeable/non-permeable (AUC-ROC\nof 0.9704 and 0.9685) and in regressing continuous permeability values (RMSE of\n0.4609, Pearson correlation of 0.7759). An ablation study further quantified\nthe impact of specific atom-pair interactions, revealing that the model's\npredictive power derives from its ability to learn from both common and rare,\nbut chemically significant, functional motifs. By integrating spatial geometry\ninto the graph representation, GMC-MPNN sets a new performance benchmark and\noffers a more accurate and generalizable tool for drug discovery pipelines.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T03:38:46+00:00",
"url": "http://arxiv.org/pdf/2507.18926v1",
"resource_uri": "arxiv://2507.18926v1",
"citation_count": 0
},
{
"id": "2507.18910v1",
"title": "A Systematic Review of Key Retrieval-Augmented Generation (RAG) Systems: Progress, Gaps, and Future Directions",
"authors": [
"Agada Joseph Oche",
"Ademola Glory Folashade",
"Tirthankar Ghosal",
"Arpan Biswas"
],
"abstract": "Retrieval-Augmented Generation (RAG) represents a major advancement in\nnatural language processing (NLP), combining large language models (LLMs) with\ninformation retrieval systems to enhance factual grounding, accuracy, and\ncontextual relevance. This paper presents a comprehensive systematic review of\nRAG, tracing its evolution from early developments in open domain question\nanswering to recent state-of-the-art implementations across diverse\napplications. The review begins by outlining the motivations behind RAG,\nparticularly its ability to mitigate hallucinations and outdated knowledge in\nparametric models. Core technical components-retrieval mechanisms,\nsequence-to-sequence generation models, and fusion strategies are examined in\ndetail. A year-by-year analysis highlights key milestones and research trends,\nproviding insight into RAG's rapid growth. The paper further explores the\ndeployment of RAG in enterprise systems, addressing practical challenges\nrelated to retrieval of proprietary data, security, and scalability. A\ncomparative evaluation of RAG implementations is conducted, benchmarking\nperformance on retrieval accuracy, generation fluency, latency, and\ncomputational efficiency. Persistent challenges such as retrieval quality,\nprivacy concerns, and integration overhead are critically assessed. Finally,\nthe review highlights emerging solutions, including hybrid retrieval\napproaches, privacy-preserving techniques, optimized fusion strategies, and\nagentic RAG architectures. These innovations point toward a future of more\nreliable, efficient, and context-aware knowledge-intensive NLP systems.",
"categories": [
"cs.CL",
"cs.LG"
],
"published": "2025-07-25T03:05:46+00:00",
"url": "http://arxiv.org/pdf/2507.18910v1",
"resource_uri": "arxiv://2507.18910v1",
"citation_count": 0
},
{
"id": "2507.18903v1",
"title": "Probably Approximately Correct Causal Discovery",
"authors": [
"Mian Wei",
"Somesh Jha",
"David Page"
],
"abstract": "The discovery of causal relationships is a foundational problem in artificial\nintelligence, statistics, epidemiology, economics, and beyond. While elegant\ntheories exist for accurate causal discovery given infinite data, real-world\napplications are inherently resource-constrained. Effective methods for\ninferring causal relationships from observational data must perform well under\nfinite data and time constraints, where \"performing well\" implies achieving\nhigh, though not perfect accuracy. In his seminal paper A Theory of the\nLearnable, Valiant highlighted the importance of resource constraints in\nsupervised machine learning, introducing the concept of Probably Approximately\nCorrect (PAC) learning as an alternative to exact learning. Inspired by\nValiant's work, we propose the Probably Approximately Correct Causal (PACC)\nDiscovery framework, which extends PAC learning principles to the causal field.\nThis framework emphasizes both computational and sample efficiency for\nestablished causal methods such as propensity score techniques and instrumental\nvariable approaches. Furthermore, we show that it can also provide theoretical\nguarantees for other widely used methods, such as the Self-Controlled Case\nSeries (SCCS) method, which had previously lacked such guarantees.",
"categories": [
"stat.ML",
"cs.LG"
],
"published": "2025-07-25T02:51:15+00:00",
"url": "http://arxiv.org/pdf/2507.18903v1",
"resource_uri": "arxiv://2507.18903v1",
"citation_count": 0
},
{
"id": "2507.18885v1",
"title": "IsaMini: Redesigned Isabelle Proof Lanugage for Machine Learning",
"authors": [
"Qiyuan Xu",
"Renxi Wang",
"Haonan Li",
"David Sanan",
"Conrad Watt"
],
"abstract": "Neural Theorem Proving (NTP) employs deep learning methods, particularly\nLarge Language Models (LLMs), to automate formal proofs in proof assistants.\nThis approach holds promise for reducing the dramatic labor costs or\ncomputation costs required in proof engineering, which is fundamental to formal\nverification and other software engineering methods. The paper explores the\npotential of improving NTP by redesigning the proof language, given that LLMs'\ncapabilities depend highly on representations. We introduce \\emph{MiniLang}, a\nredesigned proof language for Isabelle/HOL incorporating an improved version of\nSledgehammer. Experiments show MiniLang benefits two fine-tuned LLMs by\nimproving the success rate on the PISA benchmark by up to 29\\% in comparison to\ngeneration of Isar proof script. The success rate under one attempt (so-called\n\\emph{pass@1}) reaches 69.1\\%, exceeding the previous Baldur's pass@64\n(65.7\\%); The pass@8 reaches 79.2\\%, exceeding the state-of-the-art on PISA\n(71.0\\%) achieved by Magnushammer.",
"categories": [
"cs.PL"
],
"published": "2025-07-25T02:04:56+00:00",
"url": "http://arxiv.org/pdf/2507.18885v1",
"resource_uri": "arxiv://2507.18885v1",
"citation_count": 0
},
{
"id": "2507.18867v1",
"title": "Learning Individual Intrinsic Reward in Multi-Agent Reinforcement Learning via Incorporating Generalized Human Expertise",
"authors": [
"Xuefei Wu",
"Xiao Yin",
"Yuanyang Zhu",
"Chunlin Chen"
],
"abstract": "Efficient exploration in multi-agent reinforcement learning (MARL) is a\nchallenging problem when receiving only a team reward, especially in\nenvironments with sparse rewards. A powerful method to mitigate this issue\ninvolves crafting dense individual rewards to guide the agents toward efficient\nexploration. However, individual rewards generally rely on manually engineered\nshaping-reward functions that lack high-order intelligence, thus it behaves\nineffectively than humans regarding learning and generalization in complex\nproblems. To tackle these issues, we combine the above two paradigms and\npropose a novel framework, LIGHT (Learning Individual Intrinsic reward via\nIncorporating Generalized Human experTise), which can integrate human knowledge\ninto MARL algorithms in an end-to-end manner. LIGHT guides each agent to avoid\nunnecessary exploration by considering both individual action distribution and\nhuman expertise preference distribution. Then, LIGHT designs individual\nintrinsic rewards for each agent based on actionable representational\ntransformation relevant to Q-learning so that the agents align their action\npreferences with the human expertise while maximizing the joint action value.\nExperimental results demonstrate the superiority of our method over\nrepresentative baselines regarding performance and better knowledge reusability\nacross different sparse-reward tasks on challenging scenarios.",
"categories": [
"cs.LG",
"cs.AI",
"cs.MA"
],
"published": "2025-07-25T00:59:10+00:00",
"url": "http://arxiv.org/pdf/2507.18867v1",
"resource_uri": "arxiv://2507.18867v1",
"citation_count": 0
},
{
"id": "2507.18866v1",
"title": "Early Mortality Prediction in ICU Patients with Hypertensive Kidney Disease Using Interpretable Machine Learning",
"authors": [
"Yong Si",
"Junyi Fan",
"Li Sun",
"Shuheng Chen",
"Minoo Ahmadi",
"Elham Pishgar",
"Kamiar Alaei",
"Greg Placencia",
"Maryam Pishgar"
],
"abstract": "Background: Hypertensive kidney disease (HKD) patients in intensive care\nunits (ICUs) face high short-term mortality, but tailored risk prediction tools\nare lacking. Early identification of high-risk individuals is crucial for\nclinical decision-making. Methods: We developed a machine learning framework to\npredict 30-day in-hospital mortality among ICU patients with HKD using early\nclinical data from the MIMIC-IV v2.2 database. A cohort of 1,366 adults was\ncurated with strict criteria, excluding malignancy cases. Eighteen clinical\nfeatures-including vital signs, labs, comorbidities, and therapies-were\nselected via random forest importance and mutual information filtering. Several\nmodels were trained and compared with stratified five-fold cross-validation;\nCatBoost demonstrated the best performance. Results: CatBoost achieved an AUROC\nof 0.88 on the independent test set, with sensitivity of 0.811 and specificity\nof 0.798. SHAP values and Accumulated Local Effects (ALE) plots showed the\nmodel relied on meaningful predictors such as altered consciousness,\nvasopressor use, and coagulation status. Additionally, the DREAM algorithm was\nintegrated to estimate patient-specific posterior risk distributions, allowing\nclinicians to assess both predicted mortality and its uncertainty. Conclusions:\nWe present an interpretable machine learning pipeline for early, real-time risk\nassessment in ICU patients with HKD. By combining high predictive performance\nwith uncertainty quantification, our model supports individualized triage and\ntransparent clinical decisions. This approach shows promise for clinical\ndeployment and merits external validation in broader critical care populations.",
"categories": [
"cs.LG"
],
"published": "2025-07-25T00:48:23+00:00",
"url": "http://arxiv.org/pdf/2507.18866v1",
"resource_uri": "arxiv://2507.18866v1",
"citation_count": 0
},
{
"id": "2507.18858v1",
"title": "Weak-to-Strong Generalization with Failure Trajectories: A Tree-based Approach to Elicit Optimal Policy in Strong Models",
"authors": [
"Ruimeng Ye",
"Zihan Wang",
"Xiao Yang",
"Zinan Ling",
"Manling Li",
"Bo Hui"
],
"abstract": "Weak-to-Strong generalization (W2SG) is a new trend to elicit the full\ncapabilities of a strong model with supervision from a weak model. While\nexisting W2SG studies focus on simple tasks like binary classification, we\nextend this paradigm to complex interactive decision-making environments.\nSpecifically, we fine-tune a strong model with trajectories of intermediate\nactions generated by a weak model. Motivated by the human learning process, we\npropose to generalize not only success knowledge but also failure experience so\nthat the strong model can learn from failed trajectories accumulated by weak\nmodels. To effectively and efficiently elicit the potential of strong agents,\nwe further construct ``trajectory trees,\" a hierarchical representation that\norganizes weak model-generated action trajectories, coupled with Monte Carlo\nTree Search (MCTS) to optimize the strong model. Through theoretical analysis,\nwe provide formal guarantees for the effectiveness of our method in improving\nW2SG performance. Our empirical evaluations demonstrate substantial\nimprovements in reasoning and decision-making capabilities across diverse task\ndomains, validating the scalability and robustness of our proposed framework.\nOur code is available at: https://github.com/yeruimeng/TraTree",
"categories": [
"cs.LG"
],
"published": "2025-07-25T00:17:09+00:00",
"url": "http://arxiv.org/pdf/2507.18858v1",
"resource_uri": "arxiv://2507.18858v1",
"citation_count": 0
},
{
"id": "2507.18849v1",
"title": "Optimizing Metachronal Paddling with Reinforcement Learning at Low Reynolds Number",
"authors": [
"Alana A. Bailey",
"Robert D. Guy"
],
"abstract": "Metachronal paddling is a swimming strategy in which an organism oscillates\nsets of adjacent limbs with a constant phase lag, propagating a metachronal\nwave through its limbs and propelling it forward. This limb coordination\nstrategy is utilized by swimmers across a wide range of Reynolds numbers, which\nsuggests that this metachronal rhythm was selected for its optimality of\nswimming performance. In this study, we apply reinforcement learning to a\nswimmer at zero Reynolds number and investigate whether the learning algorithm\nselects this metachronal rhythm, or if other coordination patterns emerge. We\ndesign the swimmer agent with an elongated body and pairs of straight,\ninflexible paddles placed along the body for various fixed paddle spacings.\nBased on paddle spacing, the swimmer agent learns qualitatively different\ncoordination patterns. At tight spacings, a back-to-front metachronal wave-like\nstroke emerges which resembles the commonly observed biological rhythm, but at\nwide spacings, different limb coordinations are selected. Across all resulting\nstrokes, the fastest stroke is dependent on the number of paddles, however, the\nmost efficient stroke is a back-to-front wave-like stroke regardless of the\nnumber of paddles.",
"categories": [
"physics.flu-dyn",
"cs.LG",
"stat.ML"
],
"published": "2025-07-24T23:38:06+00:00",
"url": "http://arxiv.org/pdf/2507.18849v1",
"resource_uri": "arxiv://2507.18849v1",
"citation_count": 0
},
{
"id": "2507.18838v1",
"title": "Flow Stochastic Segmentation Networks",
"authors": [
"Fabio De Sousa Ribeiro",
"Omar Todd",
"Charles Jones",
"Avinash Kori",
"Raghav Mehta",
"Ben Glocker"
],
"abstract": "We introduce the Flow Stochastic Segmentation Network (Flow-SSN), a\ngenerative segmentation model family featuring discrete-time autoregressive and\nmodern continuous-time flow variants. We prove fundamental limitations of the\nlow-rank parameterisation of previous methods and show that Flow-SSNs can\nestimate arbitrarily high-rank pixel-wise covariances without assuming the rank\nor storing the distributional parameters. Flow-SSNs are also more efficient to\nsample from than standard diffusion-based segmentation models, thanks to most\nof the model capacity being allocated to learning the base distribution of the\nflow, constituting an expressive prior. We apply Flow-SSNs to challenging\nmedical imaging benchmarks and achieve state-of-the-art results. Code\navailable: https://github.com/biomedia-mira/flow-ssn.",
"categories": [
"cs.CV",
"cs.AI",
"stat.ML"
],
"published": "2025-07-24T22:26:28+00:00",
"url": "http://arxiv.org/pdf/2507.18838v1",
"resource_uri": "arxiv://2507.18838v1",
"citation_count": 0
},
{
"id": "2507.18830v1",
"title": "RealDeal: Enhancing Realism and Details in Brain Image Generation via Image-to-Image Diffusion Models",
"authors": [
"Shen Zhu",
"Yinzhu Jin",
"Tyler Spears",
"Ifrah Zawar",
"P. Thomas Fletcher"
],
"abstract": "We propose image-to-image diffusion models that are designed to enhance the\nrealism and details of generated brain images by introducing sharp edges, fine\ntextures, subtle anatomical features, and imaging noise. Generative models have\nbeen widely adopted in the biomedical domain, especially in image generation\napplications. Latent diffusion models achieve state-of-the-art results in\ngenerating brain MRIs. However, due to latent compression, generated images\nfrom these models are overly smooth, lacking fine anatomical structures and\nscan acquisition noise that are typically seen in real images. This work\nformulates the realism enhancing and detail adding process as image-to-image\ndiffusion models, which refines the quality of LDM-generated images. We employ\ncommonly used metrics like FID and LPIPS for image realism assessment.\nFurthermore, we introduce new metrics to demonstrate the realism of images\ngenerated by RealDeal in terms of image noise distribution, sharpness, and\ntexture.",
"categories": [
"cs.CV",
"cs.LG"
],
"published": "2025-07-24T22:04:39+00:00",
"url": "http://arxiv.org/pdf/2507.18830v1",
"resource_uri": "arxiv://2507.18830v1",
"citation_count": 0
},
{
"id": "2507.18827v1",
"title": "CueBuddy: helping non-native English speakers navigate English-centric STEM education",
"authors": [
"Pranav Gupta"
],
"abstract": "Students across the world in STEM classes, especially in the Global South,\nfall behind their peers who are more fluent in English, despite being at par\nwith them in terms of scientific prerequisites. While many of them are able to\nfollow everyday English at ease, key terms in English stay challenging. In most\ncases, such students have had most of their course prerequisites in a lower\nresource language. Live speech translation to lower resource languages is a\npromising area of research, however, models for speech translation can be too\nexpensive on a large scale and often struggle with technical content. In this\npaper, we describe CueBuddy, which aims to remediate these issues by providing\nreal-time \"lexical cues\" through technical keyword spotting along real-time\nmultilingual glossary lookup to help students stay up to speed with complex\nEnglish jargon without disrupting their concentration on the lecture. We also\ndescribe the limitations and future extensions of our approach.",
"categories": [
"cs.CL",
"cs.LG"
],
"published": "2025-07-24T21:56:47+00:00",
"url": "http://arxiv.org/pdf/2507.18827v1",
"resource_uri": "arxiv://2507.18827v1",
"citation_count": 0
},
{
"id": "2507.18824v1",
"title": "Deep Neural Network Driven Simulation Based Inference Method for Pole Position Estimation under Model Misspecification",
"authors": [
"Daniel Sadasivan",
"Isaac Cordero",
"Andrew Graham",
"Cecilia Marsh",
"Daniel Kupcho",
"Melana Mourad",
"Maxim Mai"
],
"abstract": "Simulation Based Inference (SBI) is shown to yield more accurate resonance\nparameter estimates than traditional chi-squared minimization in certain cases\nof model misspecification, demonstrated through a case study of pi-pi\nscattering and the rho(770) resonance. Models fit to some data sets using\nchi-squared minimization can predict inaccurate pole positions for the\nrho(770), while SBI provides more robust predictions across the same models and\ndata. This result is significant both as a proof of concept that SBI can handle\nmodel misspecification, and because accurate modeling of pi-pi scattering is\nessential in the study of many contemporary physical systems (e.g., a1(1260),\nomega(782)).",
"categories": [
"hep-ph",
"nucl-th",
"stat.AP",
"stat.ML"
],
"published": "2025-07-24T21:49:58+00:00",
"url": "http://arxiv.org/pdf/2507.18824v1",
"resource_uri": "arxiv://2507.18824v1",
"citation_count": 0
}
],
"query_components": [
[
"machine learning"
]
],
"filters": {
"date_from": null,
"min_citations": null,
"ccf_a_only": false,
"ccf_level": "A"
}
}