{"id":1069,"date":"2026-05-20T08:35:19","date_gmt":"2026-05-20T00:35:19","guid":{"rendered":"https:\/\/www.eutaboo.com\/index.php\/2026\/05\/20\/2026-05-20-%e5%8c%bb%e5%ad%a6%e5%9b%be%e5%83%8f%e5%88%86%e5%89%b2%e8%ae%ba%e6%96%87%e7%b2%be%e8%af%bb%ef%bc%9apatch-moe-mamba-%e4%b8%8e-depthpolyp\/"},"modified":"2026-05-20T08:35:19","modified_gmt":"2026-05-20T00:35:19","slug":"2026-05-20-%e5%8c%bb%e5%ad%a6%e5%9b%be%e5%83%8f%e5%88%86%e5%89%b2%e8%ae%ba%e6%96%87%e7%b2%be%e8%af%bb%ef%bc%9apatch-moe-mamba-%e4%b8%8e-depthpolyp","status":"publish","type":"post","link":"https:\/\/www.eutaboo.com\/index.php\/2026\/05\/20\/2026-05-20-%e5%8c%bb%e5%ad%a6%e5%9b%be%e5%83%8f%e5%88%86%e5%89%b2%e8%ae%ba%e6%96%87%e7%b2%be%e8%af%bb%ef%bc%9apatch-moe-mamba-%e4%b8%8e-depthpolyp\/","title":{"rendered":"2026-05-20 \u533b\u5b66\u56fe\u50cf\u5206\u5272\u8bba\u6587\u7cbe\u8bfb\uff1aPatch-MoE Mamba \u4e0e DepthPolyp"},"content":{"rendered":"<h1>\u4eca\u65e5\u533b\u5b66\u56fe\u50cf\u5206\u5272\u6700\u65b0\u8bba\u6587\u7cbe\u8bfb\u8ffd\u8e2a<\/h1>\n<h2>\u4eca\u65e5\u7ed3\u8bba<\/h2>\n<p>\u4eca\u5929\u672a\u68c0\u7d22\u5230\u660e\u786e\u5df2\u6b63\u5f0f\u63a5\u6536 MICCAI \/ CVPR \/ ICCV \/ ECCV \/ NeurIPS \/ ICLR \/ MedIA \/ TMI \u7b49\u9876\u4f1a\u9876\u520a\u3001\u4e14\u5c1a\u672a\u88ab\u672c\u4efb\u52a1\u63a8\u8350\u8fc7\u7684\u5168\u65b0\u533b\u5b66\u56fe\u50cf\u5206\u5272\u8bba\u6587\uff1b\u53bb\u91cd\u540e\uff0c\u6700\u503c\u5f97\u5173\u6ce8\u7684\u662f\u4e24\u7bc7 2026 \u5e74 arXiv \u65b0\u7a3f\uff1a<strong>Patch-MoE Mamba<\/strong> \u4e0e <strong>DepthPolyp<\/strong>\u3002\u524d\u8005\u4ee3\u8868 Mamba\/VM-UNet \u7cfb\u5217\u5728\u626b\u63cf\u987a\u5e8f\u4e0e\u65b9\u5411\u878d\u5408\u4e0a\u7684\u7ed3\u6784\u6539\u9020\uff0c\u540e\u8005\u4ee3\u8868\u9762\u5411\u771f\u5b9e\u5185\u955c\u9000\u5316\u573a\u666f\u7684\u8f7b\u91cf\u7ea7 polyp segmentation \u4e0e\u9c81\u68d2\u8bc4\u6d4b\u8d8b\u52bf\uff1b\u4e24\u7bc7\u90fd\u6bd4\u5355\u7eaf\u201c\u5806\u6a21\u5757\u201d\u66f4\u6709\u590d\u73b0\u548c\u6539\u9020\u4ef7\u503c\uff0c\u4f46 Patch-MoE Mamba \u7684\u8ba1\u7b97\u5f00\u9500\u548c DepthPolyp \u7684\u4f2a\u6df1\u5ea6\u4f9d\u8d56\u90fd\u9700\u8981\u8c28\u614e\u770b\u5f85\u3002<\/p>\n<h2>\u68c0\u7d22\u8bf4\u660e<\/h2>\n<p>\u4eca\u65e5\u68c0\u7d22\u8303\u56f4\u8986\u76d6 arXiv 2026-05-18\/2026-05-15 \u6700\u65b0\u63d0\u4ea4\u3001medical image segmentation\u3001polyp segmentation\u3001Mamba medical segmentation\u30013D medical image segmentation\u3001foundation model for medical segmentation \u7b49\u5173\u952e\u8bcd\uff0c\u5e76\u5bf9\u5386\u53f2\u8f93\u51fa\u6587\u4ef6 <code>\/tmp\/medseg_daily_*<\/code> \u4e2d\u7684\u6807\u9898\u3001arXiv ID \u4e0e PDF \u94fe\u63a5\u505a\u4e86\u53bb\u91cd\u68c0\u67e5\u3002\u4eca\u5929\u672a\u53d1\u73b0\u5f53\u5929\u53ef\u786e\u8ba4\u7684\u9876\u4f1a\/\u9876\u520a\u6b63\u5f0f\u63a5\u6536\u533b\u5b66\u56fe\u50cf\u5206\u5272\u65b0\u8bba\u6587\uff0c\u56e0\u6b64\u4ece\u6700\u65b0\u4e14 PDF \u53ef\u83b7\u53d6\u7684 arXiv preprint \u4e2d\u7b5b\u9009\uff1b\u5176\u4e2d <strong>DepthPolyp<\/strong> \u7684 GitHub \u9875\u9762\u6807\u6ce8\u4e3a ICPR 2026 official implementation\uff0c\u4f46 arXiv \u5143\u6570\u636e\u4ecd\u6309 preprint \u5904\u7406\u3002\u6240\u6709\u5165\u9009\u8bba\u6587\u5747\u4e3a 2025 \u5e74\u53ca\u4ee5\u540e\u3002\u5df2\u68c0\u67e5\u5386\u53f2\u63a8\u8350\u8bb0\u5f55\u5e76\u6392\u9664\u4e86\u91cd\u590d\u8bba\u6587\uff1b\u672c\u6b21\u8df3\u8fc7\u7684\u91cd\u590d\u5019\u9009\u5305\u62ec <strong>Semi-MedRef<\/strong>\u3001<strong>Evaluation of Anatomical Shape Priors<\/strong>\u3001<strong>Med-DisSeg<\/strong>\u3001<strong>SpectraFlow<\/strong>\u3001<strong>MedCore<\/strong>\u3001<strong>FEFormer<\/strong>\u3001<strong>USEMA<\/strong>\u3001<strong>CMFDNet<\/strong>\u3001<strong>Topo-VM-UNetV2<\/strong> \u7b49\u3002<\/p>\n<h2>WordPress \u53d1\u5e03<\/h2>\n<ul>\n<li>WordPress \u6587\u7ae0\u94fe\u63a5\uff1a\u5f85\u53d1\u5e03\u540e\u56de\u586b<\/li>\n<li>WordPress Post ID\uff1a\u5f85\u53d1\u5e03\u540e\u56de\u586b<\/li>\n<\/ul>\n<hr \/>\n<h2>\u8bba\u6587 1\uff1aPatch-MoE Mamba: A Patch-Ordered Mixture-of-Experts State Space Architecture for Medical Image Segmentation<\/h2>\n<h3>\u57fa\u672c\u4fe1\u606f<\/h3>\n<ul>\n<li>\u6807\u9898\uff1aPatch-MoE Mamba: A Patch-Ordered Mixture-of-Experts State Space Architecture for Medical Image Segmentation<\/li>\n<li>\u4f5c\u8005 \/ \u7b2c\u4e00\u4f5c\u8005\uff1aDiego Adame, Fabian Vazquez, Jose A. Nu\u00f1ez, Huimin Li, Jinghao Yang, Erik Enriquez, DongChul Kim, Haoteng Tang, Bin Fu, Pengfei Gu \/ \u7b2c\u4e00\u4f5c\u8005 Diego Adame<\/li>\n<li>\u65f6\u95f4\uff1a2026-05-18 arXiv v1<\/li>\n<li>\u6765\u6e90\uff1aarXiv preprint\uff0carXiv:2605.17719<\/li>\n<li>\u8bba\u6587\u9875\u9762\u94fe\u63a5\uff1ahttps:\/\/arxiv.org\/abs\/2605.17719<\/li>\n<li>PDF \u6587\u4ef6 \/ PDF \u94fe\u63a5\uff1aMEDIA:\/tmp\/medseg_daily_2026-05-20\/patch_moe_mamba_2605.17719.pdf\uff1bhttps:\/\/arxiv.org\/pdf\/2605.17719<\/li>\n<li>\u4ee3\u7801\u94fe\u63a5\uff1a\u672a\u83b7\u53d6\uff1barXiv \u9875\u9762\u4e0e PDF \u6b63\u6587\u672a\u786e\u8ba4\u5b98\u65b9\u4ee3\u7801\u94fe\u63a5<\/li>\n<li>\u4efb\u52a1\uff1a2D medical image segmentation\uff1b\u4e3b\u8981\u4e3a polyp segmentation\uff0c\u5e76\u6269\u5c55\u5230 skin lesion segmentation<\/li>\n<li>\u6570\u636e\u96c6\uff1aKvasir-SEG\u3001CVC-ClinicDB\u3001CVC-ColonDB\u3001ETIS\u3001CVC-300\uff1bISIC 2017\u3001ISIC 2018<\/li>\n<li>\u65b9\u6cd5\u7c7b\u578b\uff1aU-Net-style encoder-decoder\uff1bMamba \/ VM-UNetV2 \u6539\u9020\uff1bpatch-ordered scanning\uff1bMixture-of-Experts directional fusion\uff1bSDI skip\/feature infusion<\/li>\n<\/ul>\n<h3>paper-deep-reader \u7cbe\u8bfb\u7ed3\u679c<\/h3>\n<h4>1. \u4e00\u53e5\u8bdd\u7ed3\u8bba<\/h4>\n<p>Patch-MoE Mamba \u6700\u6709\u4ef7\u503c\u7684\u70b9\u4e0d\u662f\u63d0\u51fa\u4e00\u4e2a\u5168\u65b0\u5206\u5272\u8303\u5f0f\uff0c\u800c\u662f\u628a Vision Mamba \u5728\u533b\u5b66\u5206\u5272\u4e2d\u7684\u4e24\u4e2a\u5177\u4f53\u75db\u70b9\u2014\u2014\u50cf\u7d20\u7ea7\u626b\u63cf\u7834\u574f\u4e8c\u7ef4\u90bb\u57df\u3001\u56fa\u5b9a\u65b9\u5411\u6c42\u548c\u7f3a\u4e4f\u81ea\u9002\u5e94\u6027\u2014\u2014\u6539\u6210\u4e86\u201c\u5c40\u90e8 patch \u987a\u5e8f\u626b\u63cf + \u7a7a\u95f4\u4f4d\u7f6e\u76f8\u5173 MoE \u65b9\u5411\u878d\u5408\u201d\uff0c\u9002\u5408\u4f5c\u4e3a DAMamba \/ VM-UNet \u7c7b\u5de5\u4f5c\u7684\u7ed3\u6784\u6539\u9020\u53c2\u8003\u3002<\/p>\n<h4>2. \u7814\u7a76\u80cc\u666f\u4e0e\u6838\u5fc3\u95ee\u9898<\/h4>\n<p>\u8bba\u6587\u7814\u7a76\u7684\u662f Mamba-based medical image segmentation\uff0c\u6838\u5fc3\u573a\u666f\u662f\u606f\u8089\u548c\u76ae\u80a4\u75c5\u7076\u8fd9\u7c7b\u8fb9\u754c\u7ec6\u3001\u5f62\u6001\u53d8\u5316\u5927\u3001\u5c40\u90e8\u7eb9\u7406\u4e0e\u957f\u7a0b\u4e0a\u4e0b\u6587\u90fd\u91cd\u8981\u7684 2D \u5206\u5272\u4efb\u52a1\u3002\u4f5c\u8005\u8ba4\u4e3a CNN \u53d7\u9650\u4e8e\u5c40\u90e8\u611f\u53d7\u91ce\uff0cTransformer \u6709\u4e8c\u6b21\u590d\u6742\u5ea6\u548c\u6570\u636e\u9700\u6c42\u95ee\u9898\uff0c\u800c Mamba\/SSM \u4ee5\u7ebf\u6027\u5e8f\u5217\u590d\u6742\u5ea6\u5efa\u6a21\u957f\u7a0b\u4f9d\u8d56\uff0c\u9002\u5408\u4f5c\u4e3a\u533b\u5b66\u5206\u5272 encoder\u3002\u4f46\u73b0\u6709 Vision Mamba \u5206\u5272\u6a21\u578b\u901a\u5e38\u628a\u4e8c\u7ef4\u7279\u5f81\u56fe\u76f4\u63a5\u5c55\u5e73\u6210\u4e00\u7ef4\u5e8f\u5217\uff0c\u518d\u6cbf\u56fa\u5b9a\u65b9\u5411\u626b\u63cf\uff1b\u8fd9\u4f1a\u8ba9\u4e8c\u7ef4\u76f8\u90bb\u50cf\u7d20\u5728\u5e8f\u5217\u4e2d\u76f8\u8ddd\u5f88\u8fdc\uff0c\u7279\u522b\u4e0d\u5229\u4e8e\u5c0f\u75c5\u7076\u3001\u4f4e\u5bf9\u6bd4\u8fb9\u754c\u548c\u5c40\u90e8\u7ed3\u6784\u4fdd\u6301\u3002\u7b2c\u4e8c\u4e2a\u95ee\u9898\u662f\u591a\u65b9\u5411\u626b\u63cf\u7ed3\u679c\u5e38\u7528\u7b80\u5355\u6c42\u548c\u878d\u5408\uff0c\u9ed8\u8ba4\u6bcf\u4e2a\u65b9\u5411\u5728\u6bcf\u4e2a\u7a7a\u95f4\u4f4d\u7f6e\u540c\u7b49\u91cd\u8981\uff0c\u65e0\u6cd5\u9002\u914d\u4e0d\u540c\u5927\u5c0f\u3001\u65b9\u5411\u3001\u8fb9\u754c\u590d\u6742\u5ea6\u7684\u76ee\u6807\u3002<\/p>\n<p>\u5185\u90e8 paper map \u53ef\u6982\u62ec\u4e3a\uff1a\u672c\u6587\u5728 VM-UNetV2 \u5f0f\u533b\u5b66\u5206\u5272\u6846\u67b6\u4e2d\u7814\u7a76 Vision Mamba \u7684\u626b\u63cf\u4e0e\u878d\u5408\u673a\u5236\uff1b\u4e3b\u62db\u662f patch-ordered scanning \u4fdd\u6301\u5c40\u90e8\u4e8c\u7ef4\u8fde\u7eed\u6027\uff0c\u5e76\u7528 spatial-aware MoE \u66ff\u4ee3\u56fa\u5b9a\u65b9\u5411\u6c42\u548c\uff1b\u4e3b\u8981\u58f0\u79f0\u5728\u4e94\u4e2a\u606f\u8089\u6570\u636e\u96c6\u4e0e\u4e24\u4e2a ISIC \u6570\u636e\u96c6\u4e0a\u4f18\u4e8e U-Net\u3001U-Net v2\u3001VM-UNet\u3001VM-UNetV2\uff1b\u5173\u952e\u6280\u672f\u5bf9\u8c61\u662f patch permutation\u3001\u56db\u65b9\u5411 Mamba scanner\u3001\u4e94\u4e13\u5bb6\u878d\u5408\u3001router gating \u4e0e residual directional addition\uff1b\u771f\u6b63\u8d1f\u8f7d\u5728\u201c\u626b\u63cf\u987a\u5e8f\u662f\u5426\u4fdd\u7559\u5c40\u90e8\u6027\u201d\u548c\u201cMoE \u878d\u5408\u6536\u76ca\u662f\u5426\u62b5\u5f97\u4e0a\u5f00\u9500\u201d\uff1b\u4e3b\u8981\u5931\u8d25\u98ce\u9669\u662f\u589e\u76ca\u8f83\u5c0f\u4f46\u53c2\u6570\/FLOPs \u5927\u5e45\u589e\u52a0\u3002<\/p>\n<h4>3. \u73b0\u6709\u65b9\u6cd5\u4e0d\u8db3<\/h4>\n<p>\u4f5c\u8005\u9488\u5bf9\u7684\u4e0d\u8db3\u5f88\u660e\u786e\uff1a<\/p>\n<ol>\n<li><strong>CNN \/ U-Net \u7cfb\u5217<\/strong>\uff1a\u5c40\u90e8\u5377\u79ef\u6709\u5229\u4e8e\u8fb9\u754c\uff0c\u4f46\u957f\u7a0b\u4f9d\u8d56\u5efa\u6a21\u5f31\uff0c\u96be\u4ee5\u6355\u6349\u5927\u8303\u56f4\u4e0a\u4e0b\u6587\u3002<\/li>\n<li><strong>Transformer \u5206\u5272\u6a21\u578b<\/strong>\uff1a\u5168\u5c40\u5efa\u6a21\u5f3a\uff0c\u4f46\u8ba1\u7b97\u548c\u663e\u5b58\u6210\u672c\u9ad8\uff0c\u5bf9\u533b\u5b66\u5c0f\u6570\u636e\u96c6\u4e0d\u603b\u662f\u53cb\u597d\u3002<\/li>\n<li><strong>VM-UNet \/ VM-UNetV2 \/ Vision Mamba \u7c7b\u6a21\u578b<\/strong>\uff1a\u867d\u7136\u7ebf\u6027\u590d\u6742\u5ea6\u6709\u5438\u5f15\u529b\uff0c\u4f46\u5e38\u89c1 raster \u6216\u65b9\u5411\u626b\u63cf\u4f1a\u628a\u4e8c\u7ef4\u90bb\u57df\u5173\u7cfb\u538b\u6210\u4e00\u7ef4\u987a\u5e8f\uff0c\u5782\u76f4\u76f8\u90bb\u50cf\u7d20\u5728\u5e8f\u5217\u4e2d\u53ef\u80fd\u76f8\u8ddd\u6574\u884c\u957f\u5ea6\uff1b\u6b64\u5916\u65b9\u5411\u8f93\u51fa\u7b80\u5355\u76f8\u52a0\uff0c\u65e0\u6cd5\u6839\u636e\u5c40\u90e8\u5bf9\u8c61\u5f62\u6001\u9009\u62e9\u66f4\u6709\u7528\u7684\u65b9\u5411\/\u5c3a\u5ea6\u54cd\u5e94\u3002<\/li>\n<\/ol>\n<p>\u8fd9\u4e2a\u6279\u8bc4\u5bf9 Mamba-based segmentation \u6bd4\u8f83\u5207\u4e2d\u8981\u5bb3\uff0c\u56e0\u4e3a\u533b\u5b66\u5206\u5272\u4e0d\u662f\u56fe\u50cf\u5206\u7c7b\uff0c\u8fb9\u754c\u5c40\u90e8\u8fde\u7eed\u6027\u4e0e\u7a7a\u95f4\u6392\u5217\u8bef\u5dee\u4f1a\u76f4\u63a5\u53cd\u6620\u5230 mask \u8d28\u91cf\u3002<\/p>\n<h4>4. \u65b9\u6cd5\u603b\u89c8<\/h4>\n<p>\u6574\u4f53\u6846\u67b6\u4ecd\u662f U-Net-style segmentation network\uff1a<\/p>\n<ol>\n<li><strong>Encoder<\/strong>\uff1a\u4ee5 VM-UNetV2 \u7684 Visual State Space block \u4e3a\u57fa\u7840\uff0c\u628a\u539f VSS block \u66ff\u6362\u4e3a Patch-MoE VSS block\u3002<\/li>\n<li><strong>Patch-ordered scanning<\/strong>\uff1a\u7ed9\u5b9a\u7279\u5f81\u56fe <code>X_l \u2208 R^{C_l \u00d7 H_l \u00d7 W_l}<\/code> \u4e0e patch size <code>p<\/code>\uff0c\u5148\u628a\u7a7a\u95f4\u7f51\u683c\u5212\u5206\u4e3a\u975e\u91cd\u53e0 <code>p\u00d7p<\/code> \u5c40\u90e8 patch\uff1b\u6bcf\u4e2a patch \u5185\u6309 row-major \u987a\u5e8f\u679a\u4e3e\u50cf\u7d20\uff0c\u518d\u8fdb\u5165\u4e0b\u4e00\u4e2a patch\u3002\u8fd9\u6837 token \u6570\u4e0d\u53d8\u3001\u5206\u8fa8\u7387\u4e0d\u53d8\uff0c\u53ea\u6539\u53d8 Mamba \u7684\u8bbf\u95ee\u987a\u5e8f\uff0c\u4f7f patch \u5185\u50cf\u7d20\u5728\u5e8f\u5217\u4e2d\u8fde\u7eed\u3002<\/li>\n<li><strong>Hierarchical patch sizes<\/strong>\uff1a\u4e0d\u540c stage \/ \u4e0d\u540c\u65b9\u5411\u53ef\u4f7f\u7528\u4e0d\u540c patch size\uff0c\u4f8b\u5982\u8f83\u5927 patch \u6355\u6349\u7c97\u7ed3\u6784\uff0c\u8f83\u5c0f patch \u4fdd\u7559\u8fb9\u754c\u7ec6\u8282\u3002\u8bba\u6587\u7684 patch-size ablation \u663e\u793a\u67d0\u4e9b\u7ec4\u5408\u6bd4\u56fa\u5b9a 8\/8\/4\/4 \u66f4\u597d\u3002<\/li>\n<li><strong>Four directional scanners<\/strong>\uff1a\u6cbf forward\u3001reverse\u3001WH forward\u3001WH reverse \u56db\u4e2a\u65b9\u5411\u626b\u63cf\uff0c\u6bcf\u4e2a\u65b9\u5411\u8f93\u51fa\u4e00\u4e2a feature map <code>Y_l^{(i)}<\/code>\u3002<\/li>\n<li><strong>MoE-based directional fusion<\/strong>\uff1a\u56db\u4e2a\u65b9\u5411\u8f93\u51fa\u5148\u7ecf GroupNorm \u5f62\u6210\u56db\u4e2a\u65b9\u5411\u4e13\u5bb6\uff1b\u518d\u628a\u56db\u4e2a\u65b9\u5411 concat \u540e\u7ecf <code>1\u00d71 Conv + BN + ReLU<\/code> \u6784\u9020\u7b2c\u4e94\u4e2a concat expert\u3002Router \u7528 raw directional outputs \u7684\u5c40\u90e8 <code>DWConv3\u00d73<\/code> \u63cf\u8ff0\u548c\u5168\u5c40 GAP \u63cf\u8ff0\uff0c\u7ecf\u53ef\u5b66\u4e60 <code>\u03b1<\/code> \u6df7\u5408\u540e\u4ea7\u751f\u6bcf\u4e2a\u7a7a\u95f4\u4f4d\u7f6e\u7684 5 \u4e2a expert \u6743\u91cd\u3002<\/li>\n<li><strong>Residual directional addition<\/strong>\uff1a\u6700\u7ec8\u8f93\u51fa\u4e0d\u662f\u7eaf MoE \u52a0\u6743\u548c\uff0c\u800c\u662f <code>Z_l = \\tilde{Y}_l + \u03a3_i Y_l^{(i)}<\/code>\uff0c\u7528\u6b8b\u5dee\u65b9\u5411\u548c\u7a33\u5b9a\u8bad\u7ec3\uff0c\u907f\u514d router \u65e9\u671f\u9000\u5316\u3002<\/li>\n<li><strong>Decoder \/ SDI<\/strong>\uff1a\u8bba\u6587\u91c7\u7528 U-Net v2 \u7684 Semantics and Detail Infusion\uff08SDI\uff09\u6a21\u5757\uff0c\u7528 Hadamard product \u5c06\u9ad8\u5c42\u8bed\u4e49\u4e0e\u4f4e\u5c42\u7ec6\u8282\u6ce8\u5165\u4e0d\u540c\u5c3a\u5ea6\u7279\u5f81\uff0cdecoder \u57fa\u672c\u4fdd\u7559 VM-UNetV2 \u8bbe\u8ba1\u3002<\/li>\n<\/ol>\n<h4>5. \u6838\u5fc3\u6a21\u5757\u62c6\u89e3<\/h4>\n<p><strong>\u6a21\u5757 A\uff1aPatch-ordered scanning<\/strong><br \/>\n- \u8f93\u5165\uff1aencoder stage \u7684\u4e8c\u7ef4\u7279\u5f81\u56fe <code>X_l<\/code>\u3002<br \/>\n- \u8f93\u51fa\uff1a\u91cd\u6392\u540e\u7684\u4e00\u7ef4\u5e8f\u5217\uff0c\u968f\u540e\u7ecf Mamba\/SSM \u5904\u7406\u5e76 reshape \u56de\u4e8c\u7ef4\u3002<br \/>\n- \u89e3\u51b3\u95ee\u9898\uff1a\u51cf\u8f7b raster flattening \u5bfc\u81f4\u7684\u4e8c\u7ef4\u90bb\u57df\u65ad\u88c2\uff0c\u5c24\u5176\u4fdd\u62a4\u5c40\u90e8\u8fb9\u754c\u4e0e\u5c0f\u75c5\u7076\u7ed3\u6784\u3002<br \/>\n- \u521b\u65b0\u6027\u5224\u65ad\uff1a\u601d\u8def\u6734\u7d20\u4f46\u6709\u6548\uff0c\u5c5e\u4e8e\u5bf9 Vision Mamba \u5e8f\u5217\u5316\u65b9\u5f0f\u7684\u5408\u7406\u533b\u5b66\u5206\u5272\u9002\u914d\uff1b\u6bd4\u201c\u53ea\u6362 Mamba block \u540d\u5b57\u201d\u66f4\u6709\u9488\u5bf9\u6027\u3002<br \/>\n- \u53ef\u8fc1\u79fb\u6027\uff1a\u5f88\u9002\u5408\u8fc1\u79fb\u5230 DAMamba\u3001VM-UNet\u3001SegMamba\u30012D polyp segmentation \u6846\u67b6\uff1b\u5bf9 3D medical image segmentation \u4e5f\u53ef\u6269\u5c55\u6210 block\/patch\/voxel ordering\uff0c\u4f46\u9700\u8981\u91cd\u65b0\u8003\u8651\u4e09\u7ef4\u90bb\u57df\u3001\u663e\u5b58\u548c\u626b\u63cf\u65b9\u5411\u3002<\/p>\n<p><strong>\u6a21\u5757 B\uff1aHierarchical patch sizes<\/strong><br \/>\n- \u8f93\u5165\uff1a\u591a stage feature maps \u4e0e\u4e0d\u540c\u65b9\u5411\u626b\u63cf\u3002<br \/>\n- \u8f93\u51fa\uff1a\u4e0d\u540c\u5c40\u90e8\u5c3a\u5ea6\u7684\u5e8f\u5217\u5316\u8def\u5f84\u3002<br \/>\n- \u4f5c\u7528\uff1a\u7528\u5927 patch \u589e\u5f3a\u533a\u57df\u4e0a\u4e0b\u6587\uff0c\u7528\u5c0f patch \u6355\u6349\u7ec6\u8fb9\u754c\u3002<br \/>\n- \u8bc4\u4ef7\uff1a\u8bba\u6587\u505a\u4e86 patch-size \u8868\u683c\uff0c\u4f46\u914d\u7f6e\u5b57\u7b26\u4e32\u89e3\u91ca\u4e0d\u591f\u76f4\u89c2\uff1b\u4f5c\u4e3a\u8d85\u53c2\u6570\u53ef\u80fd\u6570\u636e\u96c6\u76f8\u5173\uff0c\u590d\u73b0\u65f6\u5e94\u5355\u72ec\u8c03\u53c2\u3002<\/p>\n<p><strong>\u6a21\u5757 C\uff1aMoE directional fusion<\/strong><br \/>\n- \u8f93\u5165\uff1a\u56db\u4e2a\u65b9\u5411 Mamba \u8f93\u51fa <code>Y_l^{(1..4)}<\/code>\u3002<br \/>\n- \u8f93\u51fa\uff1a\u7a7a\u95f4\u4f4d\u7f6e\u76f8\u5173\u7684\u878d\u5408\u7279\u5f81 <code>\\tilde{Y}_l<\/code>\u3002<br \/>\n- \u89e3\u51b3\u95ee\u9898\uff1a\u66ff\u4ee3\u56fa\u5b9a\u6c42\u548c\uff0c\u8ba9\u6a21\u578b\u5728\u5c0f\u606f\u8089\u3001\u8fb9\u754c\u533a\u57df\u3001\u80cc\u666f\u5e72\u6270\u533a\u57df\u9009\u62e9\u4e0d\u540c\u65b9\u5411\/concat expert\u3002<br \/>\n- \u521b\u65b0\u6027\u5224\u65ad\uff1a\u628a MoE \u7528\u4e8e\u65b9\u5411\u878d\u5408\u662f\u5408\u7406\u6269\u5c55\uff0c\u4f46\u4e0d\u662f\u7a00\u758f\u5927\u6a21\u578b\u610f\u4e49\u4e0a\u7684 MoE\uff1b\u66f4\u50cf spatial attention \/ dynamic fusion\u3002<br \/>\n- \u53ef\u8fc1\u79fb\u6027\uff1a\u9002\u5408\u63d2\u5230 DAMamba \u7684\u591a\u65b9\u5411\u626b\u63cf\u878d\u5408\u5904\uff0c\u4e5f\u9002\u5408\u505a\u201c\u8f7b\u91cf router + \u65b9\u5411\u878d\u5408\u201d\u6d88\u878d\uff1b\u4f46\u539f\u7248 concat expert \u4ee3\u4ef7\u5f88\u5927\u3002<\/p>\n<p><strong>\u6a21\u5757 D\uff1aResidual directional addition<\/strong><br \/>\n- \u8f93\u5165\uff1aMoE \u878d\u5408\u8f93\u51fa\u4e0e\u56db\u4e2a raw directional outputs\u3002<br \/>\n- \u8f93\u51fa\uff1a\u7a33\u5b9a\u540e\u7684 block \u8f93\u51fa\u3002<br \/>\n- \u4f5c\u7528\uff1a\u4fdd\u7559 VM-UNetV2 \u56fa\u5b9a\u65b9\u5411\u6c42\u548c\u7684\u5f3a baseline\uff0c\u907f\u514d gating \u5b66\u574f\u3002<br \/>\n- \u5173\u952e caveat\uff1a\u590d\u6742\u5ea6\u8868\u663e\u793a \u201cw\/o Residual Addition\u201d \u4e0e\u5b8c\u6574\u6a21\u578b\u53c2\u6570\/FLOPs \u76f8\u540c\uff0c\u56e0\u6b64\u6b8b\u5dee\u672c\u8eab\u4e0d\u589e\u52a0\u590d\u6742\u5ea6\uff1b\u771f\u6b63\u589e\u52a0\u5f00\u9500\u7684\u662f concat expert \/ MoE \u7ed3\u6784\u3002<\/p>\n<h4>6. \u5b9e\u9a8c\u8bbe\u8ba1\u4e0e\u7ed3\u679c<\/h4>\n<p>\u5b9e\u9a8c\u8986\u76d6\u4e94\u4e2a\u606f\u8089\u6570\u636e\u96c6\u548c\u4e24\u4e2a\u76ae\u80a4\u75c5\u7076\u6570\u636e\u96c6\u3002\u606f\u8089\u5b9e\u9a8c\u6cbf\u7528 U-Net v2 \u534f\u8bae\uff1aKvasir-SEG 900 \u5f20 + ClinicDB 550 \u5f20\u8bad\u7ec3\uff1b\u6d4b\u8bd5\u5305\u62ec CVC-300 60\u3001ColonDB 380\u3001ETIS 196\u3001Kvasir-SEG 100\u3001ClinicDB 62\u3002ISIC 2017\/2018 \u4f7f\u7528 U-Net v2 \u7684 train\/test split\u3002\u8bad\u7ec3\u8bbe\u7f6e\u5305\u62ec PyTorch\u3001A100 80GB\u3001AdamW\u3001lr <code>1e-3<\/code>\u3001batch size 80\u3001256\u00d7256\u3001300 epochs\u3001cosine annealing\uff0cVMamba-S \u9884\u8bad\u7ec3\u521d\u59cb\u5316\u3002<\/p>\n<p>\u4e3b\u8981\u7ed3\u679c\uff1a<\/p>\n<ul>\n<li><strong>Polyp datasets<\/strong>\uff1a\u76f8\u6bd4 VM-UNetV2\uff0cPatch-MoE Mamba \u5728 Dice \u4e0a\u6574\u4f53\u63d0\u5347\u6709\u9650\u4f46\u8f83\u7a33\u5b9a\u3002Kvasir-SEG 90.82\u219290.90\uff0cClinicDB 90.52\u219291.32\uff0cColonDB 76.62\u219277.94\uff0cETIS 72.56\u219274.04\uff0cCVC-300 86.80\u219287.31\u3002\u6700\u5927\u6536\u76ca\u5728 ColonDB\/ETIS \u8fd9\u7c7b\u66f4\u96be\u6cdb\u5316\u7684\u6570\u636e\u96c6\u3002<\/li>\n<li><strong>ISIC 2017\/2018<\/strong>\uff1aISIC 2017 Dice 90.23\u219290.85\uff0cISIC 2018 Dice 88.36\u219289.34\uff0c\u76f8\u5bf9 VM-UNetV2 \u4e5f\u6709\u5c0f\u5e45\u63d0\u5347\u3002<\/li>\n<li><strong>Ablation<\/strong>\uff1aVM-UNetV2 \u5e73\u5747 Dice 83.46\uff1b\u52a0\u5165 patch-ordered scanning \u5230 84.02\uff1b\u518d\u52a0 MoE fusion \u5230 84.30\u3002\u8bf4\u660e\u4e3b\u8981\u6536\u76ca\u6765\u81ea patch scanning\uff0cMoE \u7ee7\u7eed\u8d21\u732e\u7ea6 0.28 Dice\u3002<\/li>\n<li><strong>Complexity<\/strong>\uff1a\u8fd9\u662f\u6700\u5927\u95ee\u9898\u3002U-Net v2 \u4e3a 25.15M\/5.58G\uff0cVM-UNetV2 \u4e3a 22.77M\/5.31G\uff0c\u800c Patch-MoE Mamba \u8fbe\u5230 70.06M\/28.18G\u3002\u5373\u7528\u7ea6 3\u00d7 \u53c2\u6570\u30015\u00d7 FLOPs \u6362\u53d6\u7ea6 0.8 Dice \u5e73\u5747\u63d0\u5347\u3002<\/li>\n<\/ul>\n<h4>7. \u5b9e\u9a8c\u53ef\u4fe1\u5ea6\u5224\u65ad<\/h4>\n<p>\u53ef\u4fe1\u4e4b\u5904\uff1a<\/p>\n<ul>\n<li>\u4f7f\u7528\u4e86 5 \u4e2a polyp benchmark \u548c 2 \u4e2a ISIC benchmark\uff0c\u4efb\u52a1\u8986\u76d6\u6bd4\u5355\u4e00\u6570\u636e\u96c6\u66f4\u597d\u3002<\/li>\n<li>\u8868 I \u58f0\u660e\u6bcf\u4e2a\u5b9e\u9a8c\u7528 5 \u4e2a\u968f\u673a\u79cd\u5b50\uff0c\u62a5\u544a mean \u00b1 std\uff0c\u8fd9\u6bd4\u53ea\u62a5\u5355\u6b21\u7ed3\u679c\u66f4\u53ef\u4fe1\u3002<\/li>\n<li>\u6709\u7ec4\u4ef6\u6d88\u878d\u548c patch-size \u6d88\u878d\uff0c\u80fd\u521d\u6b65\u5206\u79bb patch scanning \u4e0e MoE fusion \u7684\u8d21\u732e\u3002<\/li>\n<li>baseline \u9009\u62e9\u5305\u542b U-Net\u3001U-Net v2\u3001VM-UNet\u3001VM-UNetV2\uff0c\u4e0e\u8bba\u6587\u4e3b\u5f20\u76f4\u63a5\u76f8\u5173\u3002<\/li>\n<\/ul>\n<p>\u4e0d\u8db3\u4e4b\u5904\uff1a<\/p>\n<ul>\n<li>\u589e\u76ca\u4e0e\u590d\u6742\u5ea6\u4e0d\u6210\u6bd4\u4f8b\u3002MoE \u5b8c\u6574\u6a21\u578b 70.06M\/28.18G\uff0c\u800c\u63d0\u5347\u591a\u6570\u5728 0.1\u20131.5 Dice \u8303\u56f4\uff1b\u82e5\u76ee\u6807\u662f\u5b9e\u65f6\u5185\u955c\u6216\u8f7b\u91cf\u90e8\u7f72\uff0c\u4e0d\u5212\u7b97\u3002<\/li>\n<li>\u7f3a\u5c11\u4e0e\u66f4\u5f3a polyp SOTA\uff08\u5982 PraNet\u3001Polyp-PVT\u3001CFFormer\u3001\u8fd1\u671f foundation\/SAM adapter \u7c7b\u65b9\u6cd5\uff09\u7684\u5b8c\u6574\u6bd4\u8f83\uff1b\u4ec5\u4e0e U-Net\/VM-UNet \u7cfb\u5217\u6bd4\u8f83\u4f1a\u8ba9\u201cstate-of-the-art\u201d\u542b\u4e49\u53d8\u7a84\u3002<\/li>\n<li>\u6ca1\u6709\u7edf\u8ba1\u663e\u8457\u6027\u68c0\u9a8c\uff1b\u867d\u7136\u6709\u5747\u503c\u65b9\u5dee\uff0c\u4f46\u672a\u8bf4\u660e test split \u4e0a\u662f\u5426\u663e\u8457\u3002<\/li>\n<li>\u6ca1\u6709\u5916\u90e8\u771f\u5b9e\u89c6\u9891\/\u9000\u5316\u9c81\u68d2\u6027\u8bc4\u4f30\uff0c\u4e5f\u6ca1\u6709\u8fb9\u754c\u6307\u6807\u5982 HD95\u3001Boundary F1\uff1b\u800c\u65b9\u6cd5\u53d9\u4e8b\u5f3a\u8c03\u8fb9\u754c\u4e0e\u5c40\u90e8\u7ed3\u6784\u3002<\/li>\n<li>\u5b98\u65b9\u4ee3\u7801\u672a\u786e\u8ba4\uff0c\u590d\u73b0\u98ce\u9669\u9ad8\u3002<\/li>\n<\/ul>\n<p>\u603b\u4f53\u5224\u65ad\uff1a\u65b9\u6cd5\u673a\u5236\u53ef\u4fe1\uff0c\u5b9e\u9a8c\u80fd\u652f\u6301\u201c\u76f8\u5bf9 VM-UNetV2 \u6709\u7a33\u5b9a\u5c0f\u5e45\u63d0\u5347\u201d\uff0c\u4f46\u4e0d\u8db3\u4ee5\u652f\u6301\u201c\u8ba1\u7b97\u4ee3\u4ef7\u65e0\u5173\u7d27\u8981\u201d\u6216\u201c\u5168\u9762\u4f18\u4e8e\u6240\u6709\u73b0\u4ee3 polyp segmentation \u65b9\u6cd5\u201d\u7684\u5f3a\u8868\u8ff0\u3002<\/p>\n<h4>8. \u4e0e\u4e3b\u6d41\u533b\u5b66\u56fe\u50cf\u5206\u5272\u6846\u67b6\u7684\u5173\u7cfb<\/h4>\n<ul>\n<li><strong>U-Net \/ U-Net v2<\/strong>\uff1a\u6574\u4f53\u4ecd\u662f U-Net encoder-decoder \u8303\u5f0f\uff0cSDI \u6a21\u5757\u76f4\u63a5\u6765\u81ea U-Net v2\u3002\u5b83\u4e0d\u662f\u6446\u8131 U-Net\uff0c\u800c\u662f\u5728 U-Net scaffold \u4e2d\u66ff\u6362 encoder block\u3002<\/li>\n<li><strong>nnU-Net<\/strong>\uff1a\u8bba\u6587\u6ca1\u6709\u6309 nnU-Net \u7684 3D\/2D \u81ea\u9002\u5e94 pipeline \u505a\u5b9e\u9a8c\uff0c\u4e5f\u6ca1\u6709\u8ba8\u8bba spacing\u3001patch sampling\u3001loss\/augmentation \u81ea\u52a8\u914d\u7f6e\uff0c\u56e0\u6b64\u4e0d\u80fd\u89c6\u4e3a nnU-Net \u6539\u8fdb\uff1b\u66f4\u9002\u5408\u4f5c\u4e3a nnU-Net \u4e4b\u5916\u7684\u7814\u7a76\u578b backbone\u3002<\/li>\n<li><strong>UNetR \/ Swin-UNet \/ TransUNet \/ TransFuse<\/strong>\uff1a\u8bba\u6587\u628a Transformer \u7684\u4e8c\u6b21\u590d\u6742\u5ea6\u4f5c\u4e3a\u5bf9\u7167\u52a8\u673a\uff0c\u4f46\u5b9e\u9a8c\u6ca1\u6709\u7cfb\u7edf\u6bd4\u8f83\u8fd9\u4e9b\u6a21\u578b\uff1bMamba \u7684\u4f18\u52bf\u4e3b\u8981\u4f53\u73b0\u5728\u5e8f\u5217\u590d\u6742\u5ea6\u7406\u8bba\uff0c\u800c\u5b9e\u9645 FLOPs \u56e0 MoE concat expert \u53d8\u9ad8\u3002<\/li>\n<li><strong>Mamba \/ VMamba \/ SegMamba \/ DAMamba \/ VM-UNetV2<\/strong>\uff1a\u5173\u7cfb\u6700\u76f4\u63a5\u3002\u53ef\u770b\u4f5c VM-UNetV2 \u7684 VSS block \u6539\u9020\uff1a\u626b\u63cf\u987a\u5e8f\u4ece pixel raster \u53d8\u4e3a patch ordered\uff0c\u65b9\u5411\u878d\u5408\u4ece sum \u53d8\u4e3a spatial-aware MoE\u3002<\/li>\n<li><strong>Foundation model for medical segmentation<\/strong>\uff1a\u6ca1\u6709\u4f7f\u7528 SAM\/MedSAM \u6216 foundation model prompt\uff1b\u4e0e foundation model \u5173\u7cfb\u5f31\u3002<\/li>\n<\/ul>\n<h4>9. \u5bf9\u6211\u8bfe\u9898\u7684\u4ef7\u503c<\/h4>\n<p>\u5bf9\u7528\u6237\u5173\u6ce8\u7684 polyp segmentation \u548c DAMamba \u6539\u9020\uff0c\u672c\u6587\u503c\u5f97\u91cd\u70b9\u770b\uff1a<\/p>\n<ol>\n<li><strong>DAMamba \u6539\u9020\u4ef7\u503c\u9ad8<\/strong>\uff1apatch-ordered scanning \u53ef\u4ee5\u4f5c\u4e3a\u66ff\u6362\u73b0\u6709 directional scan \u7684\u4f4e\u6982\u5ff5\u6210\u672c\u6a21\u5757\uff0c\u5148\u5355\u72ec\u9a8c\u8bc1\uff0c\u4e0d\u5fc5\u4e00\u5f00\u59cb\u52a0\u5165\u91cd MoE\u3002<\/li>\n<li><strong>polyp segmentation baseline \u4ef7\u503c\u4e2d\u9ad8<\/strong>\uff1a\u4e94\u4e2a\u7ecf\u5178 polyp \u6570\u636e\u96c6\u8986\u76d6\u5b8c\u6574\uff0c\u8bad\u7ec3\/\u6d4b\u8bd5\u534f\u8bae\u660e\u786e\uff0c\u53ef\u4f5c\u4e3a Mamba-polyp \u5206\u652f\u7684 related work \u548c\u5bf9\u6bd4\u5bf9\u8c61\u3002<\/li>\n<li><strong>\u6a21\u5757\u590d\u7528\u5efa\u8bae<\/strong>\uff1a\u4f18\u5148\u590d\u7528 patch-ordered scanning\uff1bMoE fusion \u53ef\u6539\u6210\u8f7b\u91cf\u7248\uff0c\u6bd4\u5982\u53bb\u6389 concat expert\u3001\u7528 depthwise separable 1\u00d71 \u6216 channel grouping\uff0c\u907f\u514d 70M \u53c2\u6570\u3002<\/li>\n<li><strong>3D medical segmentation \u4ef7\u503c\u6709\u9650\u4f46\u53ef\u542f\u53d1<\/strong>\uff1a3D \u626b\u63cf\u987a\u5e8f\u66f4\u590d\u6742\uff0c\u76f4\u63a5\u7167\u642c\u4f1a\u5f15\u8d77\u663e\u5b58\u548c\u5404\u5411\u5f02\u6027 spacing \u95ee\u9898\uff1b\u53ef\u4f5c\u4e3a\u201c\u5c40\u90e8 voxel block ordering\u201d\u601d\u8def\uff0c\u800c\u4e0d\u662f\u76f4\u63a5\u5b9e\u73b0\u3002<\/li>\n<\/ol>\n<h4>10. \u9605\u8bfb\u5efa\u8bae<\/h4>\n<p><strong>\u5efa\u8bae\u7cbe\u8bfb\u5168\u6587\uff0c\u4f46\u4ee5\u65b9\u6cd5\u548c\u6d88\u878d\u4e3a\u4e3b\uff0c\u5b9e\u9a8c\u7ed3\u8bba\u8981\u5e26\u7740\u590d\u6742\u5ea6\u6298\u6263\u9605\u8bfb\u3002<\/strong> \u5982\u679c\u7528\u6237\u6b63\u5728\u505a DAMamba \/ VM-UNet \/ polyp segmentation\uff0c\u5efa\u8bae\u5148\u8bfb Section II-B\/II-C \u4e0e Table III\/V\uff0c\u518d\u51b3\u5b9a\u662f\u5426\u590d\u73b0\uff1b\u5982\u679c\u76ee\u6807\u662f\u8f7b\u91cf\u5b9e\u65f6\u6a21\u578b\uff0c\u5219\u4e0d\u5efa\u8bae\u76f4\u63a5\u91c7\u7528\u5b8c\u6574 Patch-MoE Mamba\u3002<\/p>\n<hr \/>\n<h2>\u8bba\u6587 2\uff1aDepthPolyp: Pseudo-Depth Guided Lightweight Segmentation for Real-Time Colonoscopy<\/h2>\n<h3>\u57fa\u672c\u4fe1\u606f<\/h3>\n<ul>\n<li>\u6807\u9898\uff1aDepthPolyp: Pseudo-Depth Guided Lightweight Segmentation for Real-Time Colonoscopy<\/li>\n<li>\u4f5c\u8005 \/ \u7b2c\u4e00\u4f5c\u8005\uff1aZhuoyu Wu, Wenhui Ou, Lexi Zhang, Pei-Sze Tan, Dongjun Wu, Junhe Zhao, Wenqi Fang, Rapha\u00ebl C.-W. Phan \/ \u7b2c\u4e00\u4f5c\u8005 Zhuoyu Wu<\/li>\n<li>\u65f6\u95f4\uff1a2026-05-15 arXiv v1<\/li>\n<li>\u6765\u6e90\uff1aarXiv preprint\uff0carXiv:2605.16519\uff1bGitHub \u63cf\u8ff0\u6807\u6ce8 <code>[ICPR'26 Official Implementation]<\/code>\uff0c\u4f46\u672c\u6587\u68c0\u7d22\u4ee5 arXiv preprint \u4e3a\u51c6<\/li>\n<li>\u8bba\u6587\u9875\u9762\u94fe\u63a5\uff1ahttps:\/\/arxiv.org\/abs\/2605.16519<\/li>\n<li>PDF \u6587\u4ef6 \/ PDF \u94fe\u63a5\uff1aMEDIA:\/tmp\/medseg_daily_2026-05-20\/depthpolyp_2605.16519.pdf\uff1bhttps:\/\/arxiv.org\/pdf\/2605.16519<\/li>\n<li>\u4ee3\u7801\u94fe\u63a5\uff1ahttps:\/\/github.com\/ReaganWu\/DepthPolyp\/<\/li>\n<li>\u4efb\u52a1\uff1areal-time colonoscopy polyp segmentation\uff1b\u9c81\u68d2\/\u8f7b\u91cf\/\u9000\u5316\u573a\u666f\u5206\u5272<\/li>\n<li>\u6570\u636e\u96c6\uff1aKvasir-SEG\u3001CVC-ClinicDB\u3001CVC-ColonDB\u3001PolypGen sequences 18\u201322\uff1b\u5408\u6210\u9000\u5316 clean\/noisy \u8bc4\u6d4b<\/li>\n<li>\u65b9\u6cd5\u7c7b\u578b\uff1aMiT-B0 encoder + lightweight decoder\uff1bpseudo-depth-guided multi-task learning\uff1bGhost Factorization Module\uff1bInterleaved Shuffle Fusion\uff1bDynamic Group Gating\uff1buncertainty-weighted loss<\/li>\n<\/ul>\n<h3>paper-deep-reader \u7cbe\u8bfb\u7ed3\u679c<\/h3>\n<h4>1. \u4e00\u53e5\u8bdd\u7ed3\u8bba<\/h4>\n<p>DepthPolyp \u7684\u6700\u5927\u4ef7\u503c\u5728\u4e8e\u628a polyp segmentation \u4ece\u201c\u5e72\u51c0 benchmark \u4e0a\u5237 Dice\u201d\u63a8\u5411\u201c\u771f\u5b9e\u5185\u955c\u9000\u5316 + \u8f7b\u91cf\u90e8\u7f72 + \u4f2a\u6df1\u5ea6\u8bad\u7ec3\u6b63\u5219\u201d\u7684\u7ec4\u5408\u8bc4\u6d4b\uff0c\u5c24\u5176\u9002\u5408\u4f5c\u4e3a\u7528\u6237\u505a\u606f\u8089\u5206\u5272\u9c81\u68d2\u6027\u5b9e\u9a8c\u548c\u90e8\u7f72\u578b baseline \u7684\u53c2\u8003\u3002<\/p>\n<h4>2. \u7814\u7a76\u80cc\u666f\u4e0e\u6838\u5fc3\u95ee\u9898<\/h4>\n<p>\u672c\u6587\u805a\u7126\u5b9e\u65f6\u7ed3\u80a0\u955c\u606f\u8089\u5206\u5272\u3002\u4e34\u5e8a\u5185\u955c\u89c6\u9891\u4e2d\u5e38\u89c1 motion blur\u3001specular reflections\u3001illumination instability\u3001defocus\u3001fog\/JPEG artifacts \u7b49\u9000\u5316\uff0c\u800c\u5f88\u591a polyp segmentation \u65b9\u6cd5\u53ea\u5728 Kvasir\u3001ClinicDB \u7b49\u5e72\u51c0\u56fe\u7247\u4e0a\u8bc4\u6d4b\uff0c\u5bfc\u81f4 clean benchmark \u6210\u7ee9\u9ad8\u4f46\u771f\u5b9e\u624b\u672f\u573a\u666f\u9884\u6d4b\u4e0d\u7a33\u5b9a\u3002\u4f5c\u8005\u7684\u6838\u5fc3\u95ee\u9898\u662f\uff1a\u80fd\u5426\u8bbe\u8ba1\u4e00\u4e2a\u53c2\u6570\u91cf\u548c\u8ba1\u7b97\u91cf\u8db3\u591f\u5c0f\u3001\u5728\u79fb\u52a8\u7aef\/\u5d4c\u5165\u5f0f\u8bbe\u5907\u4e0a\u5b9e\u65f6\u8fd0\u884c\uff0c\u540c\u65f6\u5728\u9000\u5316\u5185\u955c\u56fe\u50cf\u548c\u771f\u5b9e PolypGen \u5e8f\u5217\u4e0a\u4fdd\u6301\u9c81\u68d2\u7684\u5206\u5272\u6846\u67b6\uff1f<\/p>\n<p>\u5185\u90e8 paper map \u53ef\u6982\u62ec\u4e3a\uff1a\u8bba\u6587\u7814\u7a76\u771f\u5b9e\u9000\u5316\u5185\u955c\u573a\u666f\u4e0b\u7684\u8f7b\u91cf\u606f\u8089\u5206\u5272\uff1b\u4e3b\u62db\u662f\u7528 Depth-Anything v2 \u751f\u6210 pseudo-depth\uff0c\u5728\u8bad\u7ec3\u65f6\u4f5c\u4e3a\u8f85\u52a9\u4efb\u52a1\u6b63\u5219\uff0c\u5e76\u914d\u5408 GFM\/ISF\/DGG \u8f7b\u91cf decoder\uff1b\u4e3b\u8981\u58f0\u79f0 DepthPolyp \u5728 clean\/noisy \u56db\u8c61\u9650\u8bc4\u6d4b\u3001\u8de8\u6570\u636e\u96c6\u6cdb\u5316\u3001PolypGen \u771f\u5b9e\u9000\u5316\u548c\u79fb\u52a8\u7aef\u901f\u5ea6\u4e0a\u4f18\u4e8e\u591a\u7c7b baseline\uff1b\u5173\u952e\u5bf9\u8c61\u662f MiT-B0 \u591a\u5c3a\u5ea6\u7279\u5f81\u3001pseudo-depth target\u3001Dice loss\u3001Smooth-L1 depth loss\u3001uncertainty weighting\u3001GFM\u3001ISF\u3001DGG\uff1b\u771f\u6b63\u8d1f\u8f7d\u5728\u201c\u4f2a\u6df1\u5ea6\u662f\u5426\u771f\u80fd\u63d0\u4f9b\u6bd4\u5916\u89c2\u66f4\u7a33\u7684\u7ed3\u6784\u76d1\u7763\u201d\u548c\u201c\u9000\u5316\u8bc4\u6d4b\u662f\u5426\u8d34\u8fd1\u4e34\u5e8a\u201d\uff1b\u4e3b\u8981\u5931\u8d25\u98ce\u9669\u662f synthetic degradation \u4e0e\u771f\u5b9e\u4e34\u5e8a\u57df\u4ecd\u6709\u5dee\u8ddd\uff0c\u4ee5\u53ca\u4f2a\u6df1\u5ea6\u6559\u5e08\u6a21\u578b\u53ef\u80fd\u5f15\u5165\u4e0d\u53ef\u63a7\u504f\u5dee\u3002<\/p>\n<h4>3. \u73b0\u6709\u65b9\u6cd5\u4e0d\u8db3<\/h4>\n<p>\u4f5c\u8005\u8ba4\u4e3a\u5df2\u6709\u65b9\u6cd5\u4e3b\u8981\u6709\u4e09\u7c7b\u4e0d\u8db3\uff1a<\/p>\n<ol>\n<li><strong>Transformer \/ hybrid \u5927\u6a21\u578b<\/strong>\uff1a\u5e72\u51c0\u56fe\u50cf\u4e0a\u6548\u679c\u5f3a\uff0c\u4f46\u53c2\u6570\u5e38\u8d85\u8fc7 30M\uff0c\u5728 blur\/noise \u4e0b Dice \u53ef\u5927\u5e45\u4e0b\u964d\uff0c\u4e14\u4e0d\u9002\u5408\u79fb\u52a8\u7aef\u6216\u5185\u955c\u5b9e\u65f6\u90e8\u7f72\u3002<\/li>\n<li><strong>\u8f7b\u91cf\u6a21\u578b<\/strong>\uff1a\u53c2\u6570\u548c FLOPs \u4f4e\uff0c\u4f46\u8868\u793a\u80fd\u529b\u6709\u9650\uff0c\u5728\u9000\u5316\u8f93\u5165\u4e0b\u9884\u6d4b\u5bb9\u6613\u788e\u88c2\u6216\u6f0f\u68c0\u3002<\/li>\n<li><strong>\u8fb9\u7f18\/\u663e\u8457\u6027\u7b49\u591a\u4efb\u52a1\u8f85\u52a9<\/strong>\uff1aedge\/saliency cue \u672c\u8eab\u4e5f\u53d7\u6a21\u7cca\u3001\u53cd\u5149\u3001\u4f4e\u5bf9\u6bd4\u5f71\u54cd\uff0c\u9c81\u68d2\u6027\u63d0\u5347\u6709\u9650\u3002<\/li>\n<\/ol>\n<p>\u4f5c\u8005\u8fdb\u4e00\u6b65\u6279\u8bc4\u9886\u57df\u8bc4\u6d4b\u4e60\u60ef\uff1a\u591a\u6570\u8bba\u6587\u53ea\u5728 clean test set \u4e0a\u62a5\u544a Dice\/IoU\uff0c\u5ffd\u7565\u771f\u5b9e\u5185\u955c\u89c6\u9891\u9000\u5316\uff0c\u56e0\u6b64\u9ad8\u4f30\u4e86 clinical deployment reliability\u3002<\/p>\n<h4>4. \u65b9\u6cd5\u603b\u89c8<\/h4>\n<p>DepthPolyp \u7531\u4e00\u4e2a\u8f7b\u91cf segmentation network \u548c\u8bad\u7ec3\u671f pseudo-depth supervision \u7ec4\u6210\uff1a<\/p>\n<ol>\n<li><strong>Encoder<\/strong>\uff1a\u91c7\u7528 MiT-B0 encoder\uff0c\u8f93\u51fa\u56db\u4e2a\u591a\u5c3a\u5ea6\u7279\u5f81 <code>c1..c4<\/code>\u3002<\/li>\n<li><strong>Feature projection<\/strong>\uff1a\u6bcf\u4e2a\u5c3a\u5ea6\u901a\u8fc7 token-wise linear layer \u6295\u5f71\u5230\u7edf\u4e00 channel\uff0c\u5e76 reshape\/upsample \u5230 <code>H\/4 \u00d7 W\/4<\/code>\uff1a<code>\\tilde{c_i} = Upsample(reshape(MLP_i(c_i)), size=(H\/4,W\/4))<\/code>\u3002<\/li>\n<li><strong>Hierarchical factorized decoder<\/strong>\uff1a\u5c06\u56db\u5c3a\u5ea6\u7279\u5f81\u9001\u5165 GFM\uff0c\u5206\u6210 primary stream \u4e0e auxiliary stream\uff0c\u518d\u7528 ISF \u505a\u4f4e\u6210\u672c\u8de8\u5c3a\u5ea6\/\u8de8\u7ec4\u4fe1\u606f\u4ea4\u6362\uff0c\u6700\u540e concat \u540e\u7531 DGG \u505a group-wise \u81ea\u9002\u5e94\u8c03\u5236\uff0c\u5f97\u5230 <code>F_out<\/code>\u3002<\/li>\n<li><strong>Dual heads during training<\/strong>\uff1asegmentation head \u8f93\u51fa <code>S_logit<\/code>\uff0cdepth head \u8f93\u51fa\u5f52\u4e00\u5316\u6df1\u5ea6 <code>D<\/code>\u3002<\/li>\n<li><strong>Pseudo-depth supervision<\/strong>\uff1a\u7528 frozen Depth-Anything v2-small \u4e3a\u8f93\u5165\u56fe\u50cf\u751f\u6210\u76f8\u5bf9\u6df1\u5ea6 <code>D*<\/code>\uff0c\u53ea\u5728\u8bad\u7ec3\u65f6\u76d1\u7763 depth head\uff1b\u63a8\u7406\u65f6\u4e0d\u9700\u8981 Depth-Anything\uff0c\u56e0\u6b64\u6ca1\u6709\u989d\u5916 inference overhead\u3002<\/li>\n<li><strong>Loss<\/strong>\uff1asegmentation \u7528 Dice loss\uff1bdepth \u7528 Smooth-L1\uff1b\u4e8c\u8005\u901a\u8fc7 Kendall uncertainty weighting \u81ea\u52a8\u5e73\u8861\uff1a<code>L = 1\/(2\u03c3_s^2)L_seg + 1\/(2\u03c3_d^2)L_depth + log \u03c3_s + log \u03c3_d<\/code>\u3002<\/li>\n<li><strong>Robustness protocol<\/strong>\uff1a\u5efa\u7acb Clean\u2192Clean\u3001Clean\u2192Noisy\u3001Noisy\u2192Clean\u3001Noisy\u2192Noisy \u56db\u8c61\u9650\u8bc4\u6d4b\uff0c\u5e76\u5728 PolypGen sequences 18\u201322 \u4e0a\u505a\u771f\u5b9e\u9000\u5316\u8bc4\u4f30\u3002<\/li>\n<\/ol>\n<h4>5. \u6838\u5fc3\u6a21\u5757\u62c6\u89e3<\/h4>\n<p><strong>\u6a21\u5757 A\uff1aPseudo-depth-guided multi-task learning<\/strong><br \/>\n- \u8f93\u5165\uff1aRGB colonoscopy image\uff1bDepth-Anything v2-small \u751f\u6210\u7684 normalized pseudo-depth\u3002<br \/>\n- \u8f93\u51fa\uff1a\u8bad\u7ec3\u671f depth prediction \u4e0e segmentation prediction\u3002<br \/>\n- \u89e3\u51b3\u95ee\u9898\uff1a\u5f53\u5916\u89c2\u53d7\u5230 blur\u3001specular reflection\u3001illumination shift \u5e72\u6270\u65f6\uff0c\u6df1\u5ea6\/\u51e0\u4f55\u7ed3\u6784\u76f8\u5bf9\u66f4\u7a33\u5b9a\uff0c\u53ef\u4f5c\u4e3a\u8868\u5f81\u6b63\u5219\u3002<br \/>\n- \u521b\u65b0\u6027\u5224\u65ad\uff1a\u4f2a\u6df1\u5ea6\u7528\u4e8e polyp segmentation \u4e0d\u662f\u5b8c\u5168\u9996\u6b21\uff0c\u4f46\u672c\u6587\u628a\u5b83\u4e0e\u9000\u5316\u9c81\u68d2\u3001\u8f7b\u91cf\u90e8\u7f72\u548c\u56db\u8c61\u9650\u8bc4\u6d4b\u7ed1\u5b9a\uff0c\u8d21\u732e\u66f4\u6e05\u695a\u3002<br \/>\n- \u53ef\u8fc1\u79fb\u6027\uff1a\u975e\u5e38\u9002\u5408\u8fc1\u79fb\u5230 polyp segmentation\uff1b\u4e5f\u53ef\u7528\u4e8e\u5185\u955c surgical scene segmentation\u3002\u5bf9 3D medical image segmentation \u4ef7\u503c\u8f83\u5f31\uff0c\u56e0\u4e3a 3D CT\/MRI \u672c\u8eab\u5df2\u6709\u4f53\u7d20\u51e0\u4f55\uff0c\u4f2a\u5355\u76ee\u6df1\u5ea6\u6982\u5ff5\u4e0d\u76f4\u63a5\u9002\u7528\u3002<\/p>\n<p><strong>\u6a21\u5757 B\uff1aGhost Factorization Module (GFM)<\/strong><br \/>\n- \u8f93\u5165\uff1a\u7edf\u4e00\u5c3a\u5ea6\u540e\u7684 feature map <code>X<\/code>\u3002<br \/>\n- \u8f93\u51fa\uff1aprimary component <code>X_p = PWConv(X)<\/code> \u4e0e auxiliary component <code>X_a = DWConv(X_p)<\/code>\uff0c\u5e76\u6ee1\u8db3 <code>C_p + C_a = C_out<\/code>\u3002<br \/>\n- \u4f5c\u7528\uff1a\u7528 pointwise + depthwise cheap operation \u8fd1\u4f3c\u66f4\u91cd\u7684 dense convolution\uff0c\u4e3b\u8981\u8d21\u732e\u6548\u7387\u800c\u975e\u8bed\u4e49\u89e3\u8026\u3002<br \/>\n- \u8bc1\u636e\uff1aablation \u4e2d\u53bb\u6389 GFM Dice \u53ea\u4ece 0.784 \u964d\u5230 0.776\uff0c\u4f46 iPhone FPS \u4ece 181.54 \u964d\u5230 131.39\uff0c\u8bf4\u660e GFM \u4e3b\u8981\u63d0\u5347\u901f\u5ea6\u3002<\/p>\n<p><strong>\u6a21\u5757 C\uff1aInterleaved Shuffle Fusion (ISF)<\/strong><br \/>\n- \u8f93\u5165\uff1a\u8de8\u5c3a\u5ea6 concat \u540e\u7684 primary\/auxiliary stream\u3002<br \/>\n- \u64cd\u4f5c\uff1a\u5c06 channel \u5206\u6210 4 \u7ec4\uff0c\u505a deterministic channel shuffle\uff0c\u518d\u7528 depthwise convolution \u8fdb\u884c\u7a7a\u95f4 refinement\uff0c\u6700\u540e\u7528 group-wise learnable scale <code>\u03b3<\/code> \u6b8b\u5dee\u52a0\u56de\u3002<br \/>\n- \u4f5c\u7528\uff1a\u4f4e\u6210\u672c\u589e\u5f3a\u8de8\u7ec4\/\u8de8\u5c3a\u5ea6\u4ea4\u6d41\u3002<br \/>\n- \u8bc1\u636e\uff1a\u53bb\u6389 ISF \u540e Avg Dice 0.784\u21920.760\uff0c\u8bf4\u660e\u5b83\u5bf9\u9c81\u68d2\u5206\u5272\u6709\u5b9e\u8d28\u8d21\u732e\u3002<\/p>\n<p><strong>\u6a21\u5757 D\uff1aDynamic Group Gating (DGG)<\/strong><br \/>\n- \u8f93\u5165\uff1aconcat \u540e\u7684 refined components <code>[SS, SA, AS, AA]<\/code>\u3002<br \/>\n- \u64cd\u4f5c\uff1a\u628a channel reshape \u4e3a <code>B\u00d7G\u00d7C_g\u00d7H\u00d7W<\/code>\uff0c\u5bf9 channel\/spatial \u7ef4\u505a\u5e73\u5747\u6c60\u5316\u5f97\u5230 group descriptor\uff0c\u7ecf\u8f7b\u91cf\u7ebf\u6027\u5c42\u548c sigmoid \u4ea7\u751f group gates\uff0c\u518d\u6b8b\u5dee\u8c03\u5236\u3002<br \/>\n- \u4f5c\u7528\uff1a\u6839\u636e\u56fe\u50cf\u5185\u5bb9\u81ea\u9002\u5e94\u5f3a\u8c03\u4e0d\u540c\u7ec4\u7684\u7ed3\u6784\/\u5916\u89c2\u7279\u5f81\u3002<br \/>\n- \u8bc1\u636e\uff1a\u53bb\u6389 DGG \u540e Avg Dice 0.784\u21920.736\uff0c\u540c\u65f6 iPhone FPS \u4e5f\u4e0b\u964d\u5230 147.87\uff0c\u8bf4\u660e DGG \u5728\u7cbe\u5ea6\u548c\u5b9e\u73b0\u8def\u5f84\u4e0a\u90fd\u91cd\u8981\u3002<\/p>\n<p><strong>\u6a21\u5757 E\uff1aUncertainty-weighted loss<\/strong><br \/>\n- \u8f93\u5165\uff1asegmentation Dice loss \u4e0e depth Smooth-L1 loss\u3002<br \/>\n- \u8f93\u51fa\uff1a\u81ea\u52a8\u5e73\u8861\u7684\u8054\u5408\u76ee\u6807\u3002<br \/>\n- \u4f5c\u7528\uff1a\u907f\u514d\u624b\u52a8\u8bbe\u7f6e depth loss \u6743\u91cd\uff0c\u7a33\u5b9a segmentation-depth \u591a\u4efb\u52a1\u4f18\u5316\u3002<br \/>\n- \u8bc1\u636e\uff1a\u53bb\u6389 uncertainty loss \u540e Avg Dice \u4ece 0.784 \u964d\u5230 0.605\uff0c\u662f\u6700\u4e25\u91cd ablation\uff1b\u8fd9\u8bf4\u660e\u8bad\u7ec3\u6743\u91cd\u5e73\u8861\u662f\u65b9\u6cd5\u6210\u8d25\u5173\u952e\uff0c\u4f46\u4e5f\u63d0\u793a\u590d\u73b0\u65f6\u8be5\u6a21\u5757\u975e\u5e38\u654f\u611f\u3002<\/p>\n<h4>6. \u5b9e\u9a8c\u8bbe\u8ba1\u4e0e\u7ed3\u679c<\/h4>\n<p>\u5b9e\u9a8c\u6570\u636e\u5305\u62ec\uff1aKvasir-SEG 1000 \u5f20\u4f5c\u4e3a\u4e3b\u8bad\u7ec3\/\u9a8c\u8bc1\u6570\u636e\uff1bCVC-ClinicDB 612\u3001CVC-ColonDB 380 \u4f5c\u4e3a\u8de8\u57df OOD \u8bc4\u4f30\uff1bPolypGen sequences 18\u201322 \u5171 273 \u5f20\u4f5c\u4e3a\u771f\u5b9e\u624b\u672f\u9000\u5316\u5e8f\u5217\u3002\u5408\u6210\u9000\u5316\u5305\u62ec motion blur\u3001Gaussian blur\u3001brightness\/contrast\u3001JPEG compression\u3001light spots\/reflection\u3001fog\u3001optical distortion \u7b49\u3002<\/p>\n<p>\u5173\u952e\u5b9e\u9a8c\u7ed3\u679c\uff1a<\/p>\n<ul>\n<li><strong>\u56db\u8c61\u9650\u9c81\u68d2\u8bc4\u6d4b\uff08Table 2\uff09<\/strong>\uff1aDepthPolyp Clean\u2192Clean Dice 0.9107\uff0cClean\u2192Noisy 0.8126\uff0cNoisy\u2192Clean 0.8910\uff0cNoisy\u2192Noisy 0.8525\u3002\u6240\u6709\u6a21\u578b\u5728 Clean\u2192Noisy \u90fd\u660e\u663e\u4e0b\u964d\uff0c\u8bf4\u660e\u53ea\u7528 clean training \u4e0d\u53ef\u9760\uff1bDepthPolyp \u5728 Noisy\u2192Noisy \u4e0a\u6700\u9ad8\uff0c\u5e76\u4e14 clean-domain penalty \u7ea6 -0.0197\u3002<\/li>\n<li><strong>\u8de8\u6570\u636e\u96c6\u6cdb\u5316\uff08Table 4\uff09<\/strong>\uff1a\u5728 noise-aware training \u4e0b\uff0cDepthPolyp \u4ec5 3.57M \u53c2\u6570\u30010.86 GMACs\uff1bN\u2192N Dice \u5728 Kvasir\/ClinicDB\/ColonDB \u5206\u522b\u4e3a 0.853\/0.751\/0.734\u3002\u76f8\u6bd4 SegFormer-B0\uff083.71M\uff0c1.30 GMACs\uff09\u7684 N\u2192N 0.823\/0.698\/0.621\uff0c\u63d0\u5347\u660e\u663e\uff0c\u5c24\u5176 ColonDB +0.113 Dice\u3002<\/li>\n<li><strong>\u771f\u5b9e PolypGen \u4e0e\u901f\u5ea6\uff08Table 5\uff09<\/strong>\uff1aDepthPolyp PolypGen Dice 0.679\u3001IoU 0.620\u3001Recall 0.788\uff1b\u5e73\u5747 N\u2192N Dice 0.779\u3002iPhone 15 \u4e0a 181.54 FPS\uff0cRTX 3090 \u4e0a 79.12 FPS\uff0cRaspberry Pi 4 \u4e0a 4.05 FPS\u3002\u76f8\u6bd4 SegFormer-B0\uff0cPolypGen Dice 0.634\u21920.679\uff0cGMACs 1.30\u21920.86\u3002<\/li>\n<li><strong>Ablation\uff08Table 6\uff09<\/strong>\uff1a\u5b8c\u6574\u6a21\u578b Avg Dice 0.784\uff1bw\/o depth guidance 0.759\uff1bw\/o uncertainty loss 0.605\uff1bw\/o GFM 0.776\uff1bw\/o ISF 0.760\uff1bw\/o DGG 0.736\u3002\u8bf4\u660e depth guidance \u6709\u4e2d\u7b49\u8d21\u732e\uff0cuncertainty weighting \u4e0e DGG \u662f\u5173\u952e\u8d21\u732e\u3002<\/li>\n<li><strong>Qualitative<\/strong>\uff1a\u8bba\u6587\u5c55\u793a motion blur\u3001illumination variation\u3001low contrast\u3001specular highlights \u4e0b\uff0cDepthPolyp \u7684 mask \u66f4\u7d27\u51d1\u3001false positives \u66f4\u5c11\uff1b\u8be5\u8bc1\u636e\u4e0e Table 4\/5 \u65b9\u5411\u4e00\u81f4\u3002<\/li>\n<\/ul>\n<h4>7. \u5b9e\u9a8c\u53ef\u4fe1\u5ea6\u5224\u65ad<\/h4>\n<p>\u53ef\u4fe1\u4e4b\u5904\uff1a<\/p>\n<ul>\n<li>\u8bc4\u6d4b\u95ee\u9898\u5b9a\u4e49\u597d\uff1aclean\/noisy train-test \u56db\u8c61\u9650\u76f4\u63a5\u63ed\u793a clean benchmark \u7684\u8fc7\u5ea6\u4e50\u89c2\uff0c\u8fd9\u662f\u6bd4\u5355\u7eaf\u5237\u699c\u66f4\u6709\u4ef7\u503c\u7684\u5b9e\u9a8c\u8bbe\u8ba1\u3002<\/li>\n<li>baseline \u8986\u76d6\u8f83\u5e7f\uff1aheavyweight\u3001mid-size\u3001lightweight \u5171 19 \u4e2a\u6a21\u578b\uff0c\u5305\u542b UNet\u3001PraNet\u3001SegFormer-B0\/B5\u3001CFFormer\u3001CMUNeXt\u3001ULite\u3001MedT \u7b49\u3002<\/li>\n<li>\u62a5\u544a\u4e86\u53c2\u6570\u3001GMACs\u3001GPU\/iPhone\/RPi \u901f\u5ea6\uff0c\u80fd\u652f\u6491\u201c\u8f7b\u91cf\u5b9e\u65f6\u201d\u4e3b\u5f20\u3002<\/li>\n<li>PolypGen sequences 18\u201322 \u63d0\u4f9b\u771f\u5b9e\u9000\u5316\u573a\u666f\uff0c\u4e0d\u53ea\u4f9d\u8d56\u5408\u6210\u566a\u58f0\u3002<\/li>\n<li>\u4ee3\u7801\u94fe\u63a5\u53ef\u8bbf\u95ee\uff0c\u590d\u73b0\u6761\u4ef6\u597d\u4e8e\u591a\u6570 arXiv preprint\u3002<\/li>\n<\/ul>\n<p>\u9700\u8981\u8c28\u614e\u7684\u5730\u65b9\uff1a<\/p>\n<ul>\n<li>synthetic degradation \u7684\u53c2\u6570\u662f\u5426\u771f\u5b9e\u8986\u76d6\u4e34\u5e8a\u5206\u5e03\u4ecd\u4e0d\u786e\u5b9a\uff1bTable 3 \u7ed9\u4e86\u53c2\u6570\uff0c\u4f46\u6ca1\u6709\u7528\u6237\u7814\u7a76\u6216\u8bbe\u5907\u5206\u5e03\u9a8c\u8bc1\u3002<\/li>\n<li>PolypGen \u53ea\u7528 273 \u5f20\u30015 \u4e2a\u5e8f\u5217\uff0c\u771f\u5b9e\u5916\u90e8\u9a8c\u8bc1\u4ecd\u504f\u5c0f\u3002<\/li>\n<li>\u8bba\u6587\u6ca1\u6709\u62a5\u544a\u7edf\u8ba1\u663e\u8457\u6027\u6216\u591a\u968f\u673a\u79cd\u5b50\u65b9\u5dee\uff1b\u901f\u5ea6\u6d4b\u8bd5\u4e5f\u53ef\u80fd\u53d7 CoreML conversion\u3001batch size\u3001\u8f93\u5165\u5206\u8fa8\u7387\u5f71\u54cd\u3002<\/li>\n<li>Depth-Anything v2 \u7684\u4f2a\u6df1\u5ea6\u5728\u5185\u955c\u56fe\u50cf\u4e0a\u662f\u5426\u53ef\u9760\u5e76\u6ca1\u6709\u5355\u72ec\u9a8c\u8bc1\uff1b\u5982\u679c pseudo-depth \u672c\u8eab\u5728\u53cd\u5149\/\u51fa\u8840\/\u6db2\u4f53\u573a\u666f\u51fa\u9519\uff0c\u53ef\u80fd\u628a\u9519\u8bef\u7ed3\u6784\u5148\u9a8c\u4f20\u7ed9\u5206\u5272\u6a21\u578b\u3002<\/li>\n<li>\u5206\u5272 loss \u53ea\u7528 Dice\uff0c\u672a\u52a0\u5165 boundary loss \/ focal \/ BCE\uff1b\u5bf9\u5c0f\u606f\u8089\u548c\u6781\u4e0d\u5e73\u8861\u573a\u666f\u7684\u7a33\u5b9a\u6027\u8fd8\u53ef\u8fdb\u4e00\u6b65\u8003\u5bdf\u3002<\/li>\n<\/ul>\n<p>\u603b\u4f53\u5224\u65ad\uff1a\u8bc1\u636e\u76f8\u5bf9\u5145\u5206\uff0c\u5c24\u5176\u9002\u5408\u652f\u6301\u201c\u9c81\u68d2\u8bc4\u6d4b + \u8f7b\u91cf\u90e8\u7f72\u201d\u7684\u4e3b\u5f20\uff1b\u4f46\u201c\u4e34\u5e8a\u53ef\u9760\u201d\u4ecd\u4e0d\u80fd\u8fc7\u5ea6\u63a8\u65ad\uff0c\u53ea\u80fd\u8bf4\u5728\u5408\u6210\u9000\u5316\u548c\u6709\u9650 PolypGen \u5e8f\u5217\u4e0a\u66f4\u7a33\u3002<\/p>\n<h4>8. \u4e0e\u4e3b\u6d41\u533b\u5b66\u56fe\u50cf\u5206\u5272\u6846\u67b6\u7684\u5173\u7cfb<\/h4>\n<ul>\n<li><strong>U-Net \/ nnU-Net<\/strong>\uff1aDepthPolyp \u4e0d\u662f nnU-Net pipeline\uff0c\u4e5f\u6ca1\u6709\u81ea\u52a8\u914d\u7f6e spacing\/patch\/loss\uff1b\u5b83\u66f4\u50cf\u8f7b\u91cf encoder-decoder + specialized decoder\u3002\u4e0e U-Net \u7684\u5171\u540c\u70b9\u662f\u591a\u5c3a\u5ea6 encoder-decoder \u548c skip\/fusion\uff0c\u4f46 backbone\/decoder \u8bbe\u8ba1\u4e0d\u540c\u3002<\/li>\n<li><strong>MedNeXt \/ CNN-based segmentation<\/strong>\uff1aGFM\/ISF\/DGG \u5c5e\u4e8e\u8f7b\u91cf CNN-style decoder \u6a21\u5757\uff0c\u5f3a\u8c03 depthwise\/group\/shuffle \u64cd\u4f5c\uff0c\u4e0e MedNeXt \u8fd9\u7c7b\u5927 kernel\/ConvNeXt \u5316\u8def\u7ebf\u4e0d\u540c\u3002<\/li>\n<li><strong>UNetR \/ Swin-UNet \/ TransUNet \/ TransFuse \/ SegFormer<\/strong>\uff1a\u4f7f\u7528 MiT-B0 encoder\uff0c\u4e0e SegFormer \u7cfb\u5217\u5173\u7cfb\u8f83\u8fd1\uff1b\u4f46\u4e3b\u8981\u521b\u65b0\u4e0d\u5728 Transformer block\uff0c\u800c\u5728\u8f7b\u91cf decoder \u548c\u4f2a\u6df1\u5ea6\u591a\u4efb\u52a1\u6b63\u5219\u3002<\/li>\n<li><strong>Mamba \/ VMamba \/ SegMamba \/ DAMamba<\/strong>\uff1a\u6ca1\u6709\u4f7f\u7528 Mamba\uff1b\u4e0e DAMamba \u7684\u5173\u7cfb\u4e3b\u8981\u662f\u53ef\u4f5c\u4e3a polyp robustness baseline\uff0c\u6216\u628a depth-guided auxiliary loss \u52a0\u5230 Mamba segmentation \u6846\u67b6\u91cc\u3002<\/li>\n<li><strong>Foundation model \/ SAM \/ MedSAM<\/strong>\uff1a\u6ca1\u6709\u4f7f\u7528 SAM \u505a promptable segmentation\uff1b\u4f46\u7528 Depth-Anything v2 \u4f5c\u4e3a frozen foundation model \u751f\u6210 pseudo-depth\uff0c\u56e0\u6b64\u5c5e\u4e8e\u201c\u501f\u52a9\u89c6\u89c9 foundation model \u4ea7\u751f\u8bad\u7ec3\u76d1\u7763\u201d\u7684\u8def\u7ebf\u3002<\/li>\n<\/ul>\n<h4>9. \u5bf9\u6211\u8bfe\u9898\u7684\u4ef7\u503c<\/h4>\n<p>\u5bf9\u7528\u6237\u8bfe\u9898\u7684\u4ef7\u503c\u5f88\u9ad8\uff0c\u5c24\u5176\u662f polyp segmentation\uff1a<\/p>\n<ol>\n<li><strong>\u53ef\u4f5c\u4e3a\u9c81\u68d2\u6027\u5b9e\u9a8c\u6a21\u677f<\/strong>\uff1aClean\u2192Clean \/ Clean\u2192Noisy \/ Noisy\u2192Clean \/ Noisy\u2192Noisy \u56db\u8c61\u9650\u975e\u5e38\u9002\u5408\u52a0\u5165\u7528\u6237\u8bba\u6587\u5b9e\u9a8c\u90e8\u5206\uff0c\u6bd4\u53ea\u62a5 Kvasir\/ClinicDB Dice \u66f4\u6709\u8bf4\u670d\u529b\u3002<\/li>\n<li><strong>\u53ef\u4f5c\u4e3a DAMamba \u6539\u9020\u65b9\u5411<\/strong>\uff1a\u4e0d\u9700\u8981\u6539 Mamba block\uff0c\u4e5f\u53ef\u4ee5\u628a Depth-Anything pseudo-depth auxiliary head \u4e0e uncertainty loss \u52a0\u5230 DAMamba \u8bad\u7ec3\u4e2d\uff0c\u9a8c\u8bc1 depth regularization \u662f\u5426\u6539\u5584\u53cd\u5149\/\u6a21\u7cca\u573a\u666f\u3002<\/li>\n<li><strong>\u53ef\u4f5c\u4e3a\u8f7b\u91cf baseline<\/strong>\uff1a3.57M\u30010.86 GMACs\u3001\u79fb\u52a8\u7aef 181 FPS\uff0c\u5bf9\u5b9e\u65f6\u5185\u955c\u90e8\u7f72\u6216 lightweight segmentation \u76f8\u5173 work \u5f88\u6709\u53c2\u8003\u4ef7\u503c\u3002<\/li>\n<li><strong>related work \u4ef7\u503c<\/strong>\uff1a\u53ef\u5f52\u5165 robustness-oriented polyp segmentation\u3001depth-guided auxiliary supervision\u3001deployment-aware lightweight segmentation \u4e09\u7c7b\u3002<\/li>\n<li><strong>\u590d\u73b0\u5efa\u8bae<\/strong>\uff1a\u4f18\u5148\u590d\u73b0\u5b9e\u9a8c\u534f\u8bae\u548c loss\uff0c\u800c\u4e0d\u662f\u5b8c\u5168\u590d\u523b\u6240\u6709 decoder \u6a21\u5757\uff1b\u5148\u6bd4\u8f83 w\/ vs w\/o pseudo-depth \u548c uncertainty weighting\uff0c\u518d\u51b3\u5b9a\u662f\u5426\u52a0\u5165 GFM\/ISF\/DGG\u3002<\/li>\n<\/ol>\n<h4>10. \u9605\u8bfb\u5efa\u8bae<\/h4>\n<p><strong>\u5f3a\u70c8\u5efa\u8bae\u7cbe\u8bfb\u5168\u6587\u5e76\u4f18\u5148\u590d\u73b0\u5b9e\u9a8c\u534f\u8bae\u3002<\/strong> \u5982\u679c\u7528\u6237\u8fd1\u671f\u505a polyp segmentation \u6216 DAMamba \u7684\u771f\u5b9e\u573a\u666f\u9c81\u68d2\u6027\uff0cDepthPolyp \u6bd4\u5f88\u591a\u53ea\u5728 clean benchmark \u4e0a\u5237\u5c0f\u5e45 Dice \u7684\u8bba\u6587\u66f4\u503c\u5f97\u6295\u5165\uff1b\u5efa\u8bae\u5148\u8bfb Section 3.6\u30014.2\u30014.4\u3001Table 4\u20136\uff0c\u518d\u770b decoder \u7ec6\u8282\u3002<\/p>\n<hr \/>\n<h2>\u4eca\u65e5\u63a8\u8350\u4f18\u5148\u7ea7<\/h2>\n<ol>\n<li><strong>DepthPolyp<\/strong>\uff1a\u6700\u503c\u5f97\u5148\u8bfb\u548c\u590d\u73b0\u3002\u539f\u56e0\u662f\u95ee\u9898\u5b9a\u4e49\u66f4\u8d34\u8fd1\u771f\u5b9e polyp segmentation\uff0c\u4ee3\u7801\u53ef\u8bbf\u95ee\uff0c\u8bc4\u6d4b\u8bbe\u8ba1\u5b8c\u6574\uff0c\u4e14 pseudo-depth + uncertainty loss \u5f88\u5bb9\u6613\u8fc1\u79fb\u5230\u7528\u6237\u73b0\u6709 DAMamba \/ U-Net \/ Transformer baseline \u4e2d\u3002<\/li>\n<li><strong>Patch-MoE Mamba<\/strong>\uff1a\u9002\u5408\u505a Mamba-based segmentation \u7ed3\u6784\u6539\u9020\u53c2\u8003\uff0c\u5c24\u5176\u662f patch-ordered scanning\uff1b\u4f46\u5b8c\u6574 MoE \u7248\u672c\u6210\u672c\u8fc7\u9ad8\uff0c\u5efa\u8bae\u628a\u5b83\u4f5c\u4e3a DAMamba \u626b\u63cf\/\u878d\u5408\u6a21\u5757\u7075\u611f\uff0c\u800c\u4e0d\u662f\u76f4\u63a5\u6574\u4f53\u91c7\u7528\u3002<\/li>\n<\/ol>\n<h2>\u4eca\u65e5 PDF \u83b7\u53d6\u60c5\u51b5<\/h2>\n<ul>\n<li>\u8bba\u6587 1\uff1a\u5df2\u9644 PDF\u3002MEDIA:\/tmp\/medseg_daily_2026-05-20\/patch_moe_mamba_2605.17719.pdf\uff1bPDF \u94fe\u63a5\uff1ahttps:\/\/arxiv.org\/pdf\/2605.17719<\/li>\n<li>\u8bba\u6587 2\uff1a\u5df2\u9644 PDF\u3002MEDIA:\/tmp\/medseg_daily_2026-05-20\/depthpolyp_2605.16519.pdf\uff1bPDF \u94fe\u63a5\uff1ahttps:\/\/arxiv.org\/pdf\/2605.16519<\/li>\n<\/ul>\n<h2>\u4eca\u65e5\u53ef\u6267\u884c\u5efa\u8bae<\/h2>\n<ol>\n<li><strong>\u5148\u590d\u73b0 DepthPolyp \u7684\u56db\u8c61\u9650\u9c81\u68d2\u8bc4\u6d4b\u534f\u8bae<\/strong>\uff1a\u5728 Kvasir-SEG \/ ClinicDB \/ ColonDB \u4e0a\u6784\u9020 clean\/noisy train-test\uff0c\u7ed9\u73b0\u6709 U-Net\u3001TransFuse\u3001DAMamba \u52a0\u540c\u6837\u9000\u5316\u8bc4\u6d4b\uff0c\u8fd9\u4f1a\u663e\u8457\u589e\u5f3a\u8bba\u6587\u5b9e\u9a8c\u8bf4\u670d\u529b\u3002<\/li>\n<li><strong>\u628a DepthPolyp \u7684 pseudo-depth auxiliary loss \u79fb\u690d\u5230 DAMamba<\/strong>\uff1a\u5148\u4e0d\u6539\u7f51\u7edc\u7ed3\u6784\uff0c\u53ea\u52a0 depth head + Depth-Anything pseudo-depth + uncertainty weighting\uff0c\u770b\u5728 blur\/reflection\/noisy polyp \u4e0a Dice\/IoU\/Recall \u662f\u5426\u7a33\u5b9a\u63d0\u5347\u3002<\/li>\n<li><strong>\u4ece Patch-MoE Mamba \u4e2d\u53ea\u501f\u7528 patch-ordered scanning \u505a\u8f7b\u91cf\u6d88\u878d<\/strong>\uff1a\u5148\u66ff\u6362 DAMamba\/VM-UNet \u7684\u626b\u63cf\u987a\u5e8f\uff0c\u6682\u4e0d\u52a0\u5165\u5b8c\u6574 MoE concat expert\uff1b\u82e5\u6709\u6548\uff0c\u518d\u8bbe\u8ba1\u4f4e\u6210\u672c direction gating\uff0c\u907f\u514d 70M \u53c2\u6570\u548c 28G FLOPs \u7684\u5f00\u9500\u3002<\/li>\n<\/ol>\n<h2>\u53c2\u8003\u94fe\u63a5<\/h2>\n<ul>\n<li>Patch-MoE Mamba arXiv\uff1ahttps:\/\/arxiv.org\/abs\/2605.17719<\/li>\n<li>Patch-MoE Mamba PDF\uff1ahttps:\/\/arxiv.org\/pdf\/2605.17719<\/li>\n<li>DepthPolyp arXiv\uff1ahttps:\/\/arxiv.org\/abs\/2605.16519<\/li>\n<li>DepthPolyp PDF\uff1ahttps:\/\/arxiv.org\/pdf\/2605.16519<\/li>\n<li>DepthPolyp code\uff1ahttps:\/\/github.com\/ReaganWu\/DepthPolyp\/<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>\u4eca\u65e5\u533b\u5b66\u56fe\u50cf\u5206\u5272\u6700\u65b0\u8bba\u6587\u7cbe\u8bfb\u8ffd\u8e2a \u4eca\u65e5\u7ed3\u8bba \u4eca\u5929\u672a\u68c0\u7d22\u5230\u660e\u786e\u5df2\u6b63\u5f0f\u63a5\u6536 MICCAI \/ CVPR \/ ICCV \/ ECCV \/ &#8230;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"emotion":"","emotion_color":"","title_style":"","license":"","footnotes":""},"categories":[85],"tags":[],"class_list":["post-1069","post","type-post","status-publish","format-standard","hentry","category-85"],"views":8,"_links":{"self":[{"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/posts\/1069","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/comments?post=1069"}],"version-history":[{"count":0,"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/posts\/1069\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/media?parent=1069"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/categories?post=1069"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.eutaboo.com\/index.php\/wp-json\/wp\/v2\/tags?post=1069"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}