archive.py 70 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619
  1. import hashlib
  2. import json
  3. import logging
  4. import os
  5. import re
  6. import shutil
  7. import zipfile
  8. from datetime import date, datetime, time, timezone
  9. from pathlib import Path
  10. from defusedxml import ElementTree as ET
  11. from sqlalchemy import and_, or_, select, text
  12. from sqlalchemy.ext.asyncio import AsyncSession
  13. from backend.app.core.config import settings
  14. from backend.app.models.archive import PrintArchive
  15. from backend.app.models.filament import Filament
  16. from backend.app.models.printer import Printer
  17. from backend.app.utils.safe_path import PathTraversalError, safe_join_under
  18. logger = logging.getLogger(__name__)
  19. def _copy_and_fsync(src: Path, dst: Path, chunk_size: int = 1024 * 1024) -> None:
  20. """Copy src to dst with an explicit chunked read/write and fsync the dst.
  21. Replacement for shutil.copy2 in the archive pipeline. shutil.copy2 uses
  22. Linux sendfile(), which on some kernels/filesystems has returned a short
  23. count on the first call and truncated the destination for larger 3MF
  24. uploads (#1032, observed on Raspberry Pi OS bookworm / armv7l). An
  25. explicit loop with fsync avoids that path and guarantees the dest bytes
  26. are on disk before the caller inspects them as a ZIP.
  27. """
  28. with src.open("rb") as rf, dst.open("wb") as wf:
  29. while True:
  30. buf = rf.read(chunk_size)
  31. if not buf:
  32. break
  33. wf.write(buf)
  34. wf.flush()
  35. os.fsync(wf.fileno())
  36. shutil.copystat(src, dst)
  37. def resolve_display_stem(filename: str) -> str:
  38. """Return a clean human-readable stem from a 3MF/gcode filename.
  39. Bambu Studio's "Send to printer" dialog typically writes files like
  40. ``Plate_1.gcode.3mf`` (a sliced gcode payload wrapped in a 3MF container).
  41. The naive ``Path(filename).stem`` only drops the last suffix, leaving
  42. ``Plate_1.gcode`` — which then surfaces in the archive UI as a confusing
  43. ``Plate_1.gcode`` rather than ``Plate_1`` (#1152 follow-up).
  44. Strip the recognised print-format suffixes in order:
  45. - ``.gcode.3mf`` → bare stem (Bambu Studio FTP send)
  46. - ``.3mf`` → bare stem
  47. - ``.gcode`` → bare stem (rare standalone gcode upload)
  48. Anything else passes through unchanged.
  49. """
  50. name = Path(filename).name # drop any path components
  51. lower = name.lower()
  52. for suffix in (".gcode.3mf", ".3mf", ".gcode"):
  53. if lower.endswith(suffix):
  54. return name[: -len(suffix)]
  55. return Path(name).stem
  56. def peek_plate_index_in_3mf(file_path: Path) -> int | None:
  57. """Return the plate index recorded inside a Bambu 3MF, or None.
  58. Reads only ``Metadata/slice_info.config`` to keep this cheap — used by
  59. the print-start callback to verify that the 3MF we just downloaded over
  60. FTP actually matches the plate the printer is running (#1204). The full
  61. ThreeMFParser does much more work and runs later inside ArchiveService.
  62. """
  63. try:
  64. with zipfile.ZipFile(file_path, "r") as zf:
  65. if "Metadata/slice_info.config" not in zf.namelist():
  66. return None
  67. content = zf.read("Metadata/slice_info.config").decode()
  68. root = ET.fromstring(content)
  69. plate = root.find(".//plate")
  70. if plate is None:
  71. return None
  72. for meta in plate.findall("metadata"):
  73. if meta.get("key") == "index":
  74. value = meta.get("value")
  75. if value:
  76. try:
  77. return int(value)
  78. except ValueError:
  79. return None
  80. except Exception:
  81. return None
  82. return None
  83. _PLATE_SUFFIX_RE = re.compile(r"^(.*?)(\s*-\s*Plate\s+|_plate_)(\d+)$", re.IGNORECASE)
  84. def swap_plate_suffix(name: str | None, target_plate: int) -> str | None:
  85. """Return ``name`` with its trailing plate number replaced, or None.
  86. Bambu Studio names multi-plate uploads ``"<Project> - Plate <N>"`` (and
  87. a lowercase ``"_plate_<N>"`` variant exists too — see
  88. test_print_start_expected_promotion). When MQTT subtask_name lags
  89. across consecutive plates of the same model (#1204) the suffix points
  90. at the previous plate; swapping it gives us the correct upload to
  91. re-fetch from FTP. Returns None if no recognised suffix is present.
  92. """
  93. if not name:
  94. return None
  95. m = _PLATE_SUFFIX_RE.match(name)
  96. if not m:
  97. return None
  98. base, separator, _ = m.groups()
  99. return f"{base}{separator}{target_plate}"
  100. class ThreeMFParser:
  101. """Parser for Bambu Lab 3MF files."""
  102. def __init__(self, file_path: Path, plate_number: int | None = None):
  103. self.file_path = file_path
  104. self.plate_number = plate_number # Which plate was printed (1, 2, 3, etc.)
  105. self.metadata: dict = {}
  106. def parse(self) -> dict:
  107. """Extract metadata from 3MF file."""
  108. try:
  109. with zipfile.ZipFile(self.file_path, "r") as zf:
  110. self._parse_slice_info(zf) # Now sets self.plate_number from slice_info
  111. self._parse_project_settings(zf)
  112. self._parse_gcode_header(zf)
  113. self._parse_3dmodel(zf)
  114. self._extract_thumbnail(zf) # Uses correct plate_number for thumbnail
  115. # Enhance print_name with plate info if this is a multi-plate export
  116. plate_index = self.metadata.get("_plate_index")
  117. if plate_index and plate_index > 1:
  118. # Append plate number to distinguish from other plates
  119. existing_name = self.metadata.get("print_name", "")
  120. if existing_name and f"Plate {plate_index}" not in existing_name:
  121. self.metadata["print_name"] = f"{existing_name} - Plate {plate_index}"
  122. # ALWAYS prefer slice_info values - they contain ONLY filaments actually used in print
  123. # project_settings contains ALL configured filaments (AMS slots), not just used ones
  124. if self.metadata.get("_slice_filament_type"):
  125. self.metadata["filament_type"] = self.metadata["_slice_filament_type"]
  126. if self.metadata.get("_slice_filament_color"):
  127. self.metadata["filament_color"] = self.metadata["_slice_filament_color"]
  128. # Clean up internal keys
  129. self.metadata.pop("_slice_filament_type", None)
  130. self.metadata.pop("_slice_filament_color", None)
  131. self.metadata.pop("_plate_index", None)
  132. except Exception as e:
  133. # Return whatever metadata was extracted before the error, but
  134. # surface the failure so corrupted / truncated 3MF archives are
  135. # visible in support bundles (#1032).
  136. logger.warning(
  137. "ThreeMFParser: failed to parse %s: %s(%s) — returning partial metadata",
  138. self.file_path,
  139. type(e).__name__,
  140. e,
  141. )
  142. return self.metadata
  143. def _parse_slice_info(self, zf: zipfile.ZipFile):
  144. """Parse slice_info.config for print settings and printable objects."""
  145. try:
  146. if "Metadata/slice_info.config" in zf.namelist():
  147. content = zf.read("Metadata/slice_info.config").decode()
  148. root = ET.fromstring(content)
  149. # Extract printer_model_id from plate metadata
  150. # Format: <plate><metadata key="printer_model_id" value="C11" /></plate>
  151. for meta in root.findall(".//metadata"):
  152. key = meta.get("key")
  153. value = meta.get("value")
  154. if key == "printer_model_id" and value:
  155. from backend.app.utils.printer_models import normalize_printer_model_id
  156. normalized = normalize_printer_model_id(value)
  157. if normalized:
  158. self.metadata["sliced_for_model"] = normalized
  159. break
  160. # Loop every <plate> so multi-plate exports get summed file-level
  161. # totals. Pre-fix, this used `root.find(".//plate")` which
  162. # returned only the first plate — file-level `print_time_seconds`
  163. # / `filament_used_grams` reflected plate 1 alone, and the
  164. # archive card / project rollup under-reported by the number
  165. # of plates (#1593). Per-plate breakdown is still served by
  166. # the dedicated `/plates` endpoint.
  167. plates = root.findall(".//plate")
  168. summed_time = 0
  169. summed_grams = 0.0
  170. any_time_seen = False
  171. any_grams_seen = False
  172. for plate in plates:
  173. # Plate-level fields that only make sense at the file
  174. # level when there's exactly one plate. ``plate_number``
  175. # / ``_plate_index`` describe which plate the export
  176. # represents — meaningless for an all-plates 3MF, so we
  177. # only record them in the single-plate case. ``bed_type``
  178. # is also single-valued; we take the first plate's value
  179. # as a best-effort default for the archive metadata.
  180. plate_index_value: int | None = None
  181. for meta in plate.findall("metadata"):
  182. key = meta.get("key")
  183. value = meta.get("value")
  184. if key == "index" and value:
  185. try:
  186. plate_index_value = int(value)
  187. except ValueError:
  188. pass # Skip non-numeric plate index
  189. elif key == "prediction" and value:
  190. try:
  191. summed_time += int(value)
  192. any_time_seen = True
  193. except ValueError:
  194. pass
  195. elif key == "weight" and value:
  196. try:
  197. summed_grams += float(value)
  198. any_grams_seen = True
  199. except ValueError:
  200. pass
  201. elif key == "curr_bed_type" and value and "bed_type" not in self.metadata:
  202. self.metadata["bed_type"] = value
  203. # Per-plate object lists are only kept at the file level
  204. # when there's one plate — the skip-object affordance
  205. # operates on the plate being printed, which is the
  206. # `/plates` endpoint's job for multi-plate exports.
  207. if len(plates) == 1:
  208. if plate_index_value is not None:
  209. if not self.plate_number:
  210. self.plate_number = plate_index_value
  211. self.metadata["_plate_index"] = plate_index_value
  212. printable_objects: dict[int, str] = {}
  213. for obj in plate.findall("object"):
  214. identify_id = obj.get("identify_id")
  215. name = obj.get("name")
  216. skipped = obj.get("skipped", "false")
  217. if identify_id and name and skipped.lower() != "true":
  218. try:
  219. printable_objects[int(identify_id)] = name
  220. except ValueError:
  221. pass # Skip objects with non-numeric identify_id
  222. if printable_objects:
  223. self.metadata["printable_objects"] = printable_objects
  224. if any_time_seen:
  225. self.metadata["print_time_seconds"] = summed_time
  226. if any_grams_seen:
  227. self.metadata["filament_used_grams"] = round(summed_grams, 2)
  228. # Get filament info from filaments ACTUALLY USED in the print
  229. # slice_info has <filament id="1" type="PLA" color="#FFFFFF" used_g="100" />
  230. # Only include filaments where used_g > 0
  231. filaments = root.findall(".//filament")
  232. if filaments:
  233. # Collect unique filament types and colors for filaments that are actually used
  234. types = []
  235. colors = []
  236. for f in filaments:
  237. # Check if this filament is actually used in the print
  238. used_g = f.get("used_g", "0")
  239. try:
  240. used_amount = float(used_g)
  241. except (ValueError, TypeError):
  242. used_amount = 0
  243. # Only include if used_g > 0 (filament is actually consumed)
  244. if used_amount > 0:
  245. ftype = f.get("type")
  246. fcolor = f.get("color")
  247. if ftype and ftype not in types:
  248. types.append(ftype)
  249. if fcolor and fcolor not in colors:
  250. colors.append(fcolor)
  251. if types:
  252. self.metadata["_slice_filament_type"] = ", ".join(types)
  253. if colors:
  254. self.metadata["_slice_filament_color"] = ",".join(colors)
  255. # Collect per-slot filament usage for tracking & notifications
  256. filament_slots = []
  257. for f in filaments:
  258. slot_id = f.get("id")
  259. used_g_str = f.get("used_g", "0")
  260. try:
  261. used_g = float(used_g_str)
  262. except (ValueError, TypeError):
  263. used_g = 0
  264. if used_g > 0 and slot_id:
  265. filament_slots.append(
  266. {
  267. "slot_id": int(slot_id),
  268. "used_g": round(used_g, 2),
  269. "type": f.get("type", ""),
  270. "color": f.get("color", ""),
  271. }
  272. )
  273. if filament_slots:
  274. self.metadata["filament_slots"] = filament_slots
  275. except Exception:
  276. pass # Skip unparseable slice_info metadata
  277. def _parse_project_settings(self, zf: zipfile.ZipFile):
  278. """Parse project settings for print configuration."""
  279. try:
  280. if "Metadata/project_settings.config" in zf.namelist():
  281. content = zf.read("Metadata/project_settings.config").decode()
  282. try:
  283. data = json.loads(content)
  284. self._extract_filament_info(data)
  285. self._extract_print_settings(data)
  286. except json.JSONDecodeError:
  287. pass # Skip malformed project_settings JSON
  288. except Exception:
  289. pass # Skip unreadable project settings file
  290. def _parse_gcode_header(self, zf: zipfile.ZipFile):
  291. """Parse G-code file header for total layer count and printer model."""
  292. try:
  293. # Look for plate_1.gcode or similar
  294. gcode_files = [f for f in zf.namelist() if f.endswith(".gcode")]
  295. if not gcode_files:
  296. return
  297. # Read first 4KB of G-code (header contains metadata)
  298. gcode_path = gcode_files[0]
  299. with zf.open(gcode_path) as f:
  300. header = f.read(4096).decode("utf-8", errors="ignore")
  301. # Look for "; total layer number: XX" pattern
  302. match = re.search(r";\s*total\s+layer\s+number[:\s]+(\d+)", header, re.IGNORECASE)
  303. if match:
  304. self.metadata["total_layers"] = int(match.group(1))
  305. # Total filament usage. The slicer writes the print's totals into
  306. # the G-code header ("; total filament weight [g] : 126.26"). Only
  307. # a fallback — slice_info.config is more authoritative when present
  308. # — but it covers sliced outputs whose slice_info lacks per-filament
  309. # used_g, and it's the slicer's own figure regardless.
  310. if "filament_used_grams" not in self.metadata:
  311. match = re.search(r";\s*total\s+filament\s+weight\s*\[g\]\s*:\s*([\d.]+)", header, re.IGNORECASE)
  312. if match:
  313. self.metadata["filament_used_grams"] = float(match.group(1))
  314. if "filament_used_mm" not in self.metadata:
  315. match = re.search(r";\s*total\s+filament\s+length\s*\[mm\]\s*:\s*([\d.]+)", header, re.IGNORECASE)
  316. if match:
  317. self.metadata["filament_used_mm"] = float(match.group(1))
  318. # Look for printer_model in gcode header (fallback if not found in slice_info)
  319. # Format: "; printer_model = Bambu Lab X1 Carbon" or "; printer_model = X1C"
  320. if "sliced_for_model" not in self.metadata:
  321. match = re.search(r";\s*printer_model\s*=\s*(.+)", header, re.IGNORECASE)
  322. if match:
  323. from backend.app.utils.printer_models import normalize_printer_model
  324. raw_model = match.group(1).strip()
  325. self.metadata["sliced_for_model"] = normalize_printer_model(raw_model)
  326. except Exception:
  327. pass # G-code header parsing is best-effort; metadata may come from other sources
  328. def _extract_filament_info(self, data: dict):
  329. """Extract filament info, preferring non-support filaments."""
  330. try:
  331. filament_types = data.get("filament_type", [])
  332. filament_colors = data.get("filament_colour", [])
  333. filament_is_support = data.get("filament_is_support", [])
  334. if not filament_types:
  335. return
  336. # Collect all non-support filaments
  337. non_support_types = []
  338. non_support_colors = []
  339. for i, ftype in enumerate(filament_types):
  340. is_support = filament_is_support[i] if i < len(filament_is_support) else "0"
  341. if is_support == "0":
  342. if ftype and ftype not in non_support_types:
  343. non_support_types.append(ftype)
  344. if i < len(filament_colors) and filament_colors[i]:
  345. color = filament_colors[i]
  346. if color not in non_support_colors:
  347. non_support_colors.append(color)
  348. # Fallback to first filament if all are support
  349. if not non_support_types and filament_types:
  350. non_support_types = [filament_types[0]]
  351. if not non_support_colors and filament_colors:
  352. non_support_colors = [filament_colors[0]]
  353. # Store filament type(s)
  354. if non_support_types:
  355. self.metadata["filament_type"] = ", ".join(non_support_types)
  356. # Store all colors as comma-separated (for multi-color display)
  357. if non_support_colors:
  358. self.metadata["filament_color"] = ",".join(non_support_colors)
  359. except Exception:
  360. pass # Filament info is optional; fall back to slice_info values
  361. def _extract_print_settings(self, data: dict):
  362. """Extract print settings from JSON config."""
  363. try:
  364. # Layer height - usually an array, get first value
  365. if "layer_height" in data:
  366. val = data["layer_height"]
  367. if isinstance(val, list) and val:
  368. self.metadata["layer_height"] = float(val[0])
  369. elif isinstance(val, (int, float, str)):
  370. self.metadata["layer_height"] = float(val)
  371. # Nozzle diameter
  372. if "nozzle_diameter" in data:
  373. val = data["nozzle_diameter"]
  374. if isinstance(val, list) and val:
  375. self.metadata["nozzle_diameter"] = float(val[0])
  376. elif isinstance(val, (int, float, str)):
  377. self.metadata["nozzle_diameter"] = float(val)
  378. # Bed temperature - first layer or regular
  379. for key in ["bed_temperature_initial_layer", "bed_temperature"]:
  380. if key in data:
  381. val = data[key]
  382. if isinstance(val, list) and val:
  383. self.metadata["bed_temperature"] = int(float(val[0]))
  384. elif isinstance(val, (int, float, str)):
  385. self.metadata["bed_temperature"] = int(float(val))
  386. break
  387. # Nozzle temperature
  388. for key in ["nozzle_temperature_initial_layer", "nozzle_temperature"]:
  389. if key in data:
  390. val = data[key]
  391. if isinstance(val, list) and val:
  392. self.metadata["nozzle_temperature"] = int(float(val[0]))
  393. elif isinstance(val, (int, float, str)):
  394. self.metadata["nozzle_temperature"] = int(float(val))
  395. break
  396. # Printer model (extract and normalize)
  397. if "printer_model" in data:
  398. from backend.app.utils.printer_models import normalize_printer_model
  399. self.metadata["sliced_for_model"] = normalize_printer_model(data["printer_model"])
  400. # Build plate type — only set from project_settings if slice_info didn't already
  401. # provide it (slice_info is more authoritative as it reflects the exported plate).
  402. if "bed_type" not in self.metadata and "curr_bed_type" in data:
  403. val = data["curr_bed_type"]
  404. if isinstance(val, str) and val.strip():
  405. self.metadata["bed_type"] = val.strip()
  406. except Exception:
  407. pass # Print settings are optional; missing values are left unset
  408. def _extract_settings_from_content(self, content: str):
  409. """Extract print settings from config content."""
  410. settings_map = {
  411. "layer_height": ("layer_height", float),
  412. "nozzle_diameter": ("nozzle_diameter", float),
  413. "bed_temperature": ("bed_temperature", int),
  414. "nozzle_temperature": ("nozzle_temperature", int),
  415. }
  416. for key, (search_key, converter) in settings_map.items():
  417. if key not in self.metadata:
  418. try:
  419. # Try JSON format
  420. if f'"{search_key}"' in content:
  421. start = content.find(f'"{search_key}"')
  422. value_start = content.find(":", start) + 1
  423. value_end = content.find(",", value_start)
  424. if value_end == -1:
  425. value_end = content.find("}", value_start)
  426. value = content[value_start:value_end].strip().strip('"')
  427. self.metadata[key] = converter(value)
  428. except (ValueError, TypeError):
  429. pass # Skip settings with unconvertible values
  430. def _parse_3dmodel(self, zf: zipfile.ZipFile):
  431. """Parse 3D/3dmodel.model for MakerWorld metadata."""
  432. try:
  433. model_path = "3D/3dmodel.model"
  434. if model_path not in zf.namelist():
  435. return
  436. content = zf.read(model_path).decode("utf-8", errors="ignore")
  437. # Parse XML metadata elements
  438. # MakerWorld adds metadata like: <metadata name="Designer">username</metadata>
  439. metadata_pattern = r'<metadata\s+name="([^"]+)"[^>]*>([^<]*)</metadata>'
  440. matches = re.findall(metadata_pattern, content)
  441. makerworld_fields = {}
  442. for name, value in matches:
  443. makerworld_fields[name] = value.strip()
  444. # Check for direct MakerWorld URL in content
  445. url_pattern = r'https?://makerworld\.com/[^\s<>"\']+/models/(\d+)'
  446. url_match = re.search(url_pattern, content)
  447. if url_match:
  448. self.metadata["makerworld_url"] = url_match.group(0)
  449. self.metadata["makerworld_model_id"] = url_match.group(1)
  450. # Extract model ID from DSM reference in image URLs
  451. # Format: https://makerworld.bblmw.com/makerworld/model/DSM00000001275614/...
  452. # The numeric part (1275614) is the MakerWorld model ID
  453. if "makerworld_url" not in self.metadata:
  454. dsm_pattern = r"DSM0+(\d+)"
  455. dsm_match = re.search(dsm_pattern, content)
  456. if dsm_match:
  457. model_id = dsm_match.group(1)
  458. self.metadata["makerworld_url"] = f"https://makerworld.com/en/models/{model_id}"
  459. self.metadata["makerworld_model_id"] = model_id
  460. # Store designer info
  461. if "Designer" in makerworld_fields:
  462. self.metadata["designer"] = makerworld_fields["Designer"]
  463. if "Title" in makerworld_fields:
  464. self.metadata["print_name"] = makerworld_fields["Title"]
  465. except Exception:
  466. pass # MakerWorld/3dmodel metadata is optional
  467. def _extract_thumbnail(self, zf: zipfile.ZipFile):
  468. """Extract thumbnail image from 3MF.
  469. If a plate_number was specified, try to use that plate's thumbnail first.
  470. """
  471. thumbnail_paths = []
  472. # If a specific plate was printed, try that thumbnail first
  473. if self.plate_number:
  474. thumbnail_paths.append(f"Metadata/plate_{self.plate_number}.png")
  475. # Fallback to default paths
  476. thumbnail_paths.extend(
  477. [
  478. "Metadata/plate_1.png",
  479. "Metadata/thumbnail.png",
  480. "Metadata/model_thumbnail.png",
  481. # Project-wide thumbnail BambuStudio embeds at upload time. We
  482. # only reach this when BS hasn't written a per-plate
  483. # ``Metadata/plate_N.png`` — most notably the #1493 cross-class
  484. # re-slice path where ``--arrange`` rearranges objects but the
  485. # CLI then doesn't emit a fresh per-plate preview. The
  486. # ``_middle`` size is the editor-quality variant (~500 KB);
  487. # ``_small`` and ``_3mf`` are smaller alternates if it's not
  488. # present. Without this fallback the re-sliced archive cards
  489. # render without a cover image.
  490. "Auxiliaries/.thumbnails/thumbnail_middle.png",
  491. "Auxiliaries/.thumbnails/thumbnail_small.png",
  492. "Auxiliaries/.thumbnails/thumbnail_3mf.png",
  493. ]
  494. )
  495. for thumb_path in thumbnail_paths:
  496. if thumb_path in zf.namelist():
  497. self.metadata["_thumbnail_data"] = zf.read(thumb_path)
  498. self.metadata["_thumbnail_ext"] = ".png"
  499. break
  500. def extract_printable_objects_from_3mf(
  501. data: bytes, plate_number: int | None = None, include_positions: bool = False
  502. ) -> dict[int, str] | dict[int, dict] | tuple[dict[int, dict], list | None]:
  503. """Extract printable objects from 3MF file bytes.
  504. This is a lightweight function used during print start to get the list
  505. of objects that can be skipped.
  506. Args:
  507. data: Raw bytes of the 3MF file
  508. plate_number: Which plate was printed (1-based), or None for first plate
  509. include_positions: If True, return tuple of (objects dict, bbox_all)
  510. Returns:
  511. If include_positions=False: Dictionary mapping identify_id (int) to object name (str)
  512. If include_positions=True: Tuple of (dict mapping identify_id to {name, x, y}, bbox_all list or None)
  513. """
  514. from io import BytesIO
  515. printable_objects: dict = {}
  516. bbox_all: list | None = None
  517. try:
  518. with zipfile.ZipFile(BytesIO(data), "r") as zf:
  519. if "Metadata/slice_info.config" not in zf.namelist():
  520. return printable_objects
  521. content = zf.read("Metadata/slice_info.config").decode()
  522. root = ET.fromstring(content)
  523. # Find the correct plate
  524. if plate_number:
  525. plate = root.find(f".//plate[@plate_idx='{plate_number}']")
  526. if plate is None:
  527. plate = root.find(".//plate")
  528. else:
  529. plate = root.find(".//plate")
  530. if plate is None:
  531. return printable_objects
  532. # Get actual plate index from metadata (sliced files only have one plate)
  533. plate_idx = plate_number or 1
  534. for meta in plate.findall("metadata"):
  535. if meta.get("key") == "index":
  536. try:
  537. plate_idx = int(meta.get("value", "1"))
  538. except ValueError:
  539. pass # Use default plate_idx if value is non-numeric
  540. break
  541. # Load position data from plate_N.json if we need positions
  542. # Build a lookup by name - use list to handle duplicate names
  543. bbox_by_name: dict[str, list[list]] = {}
  544. if include_positions:
  545. plate_json_path = f"Metadata/plate_{plate_idx}.json"
  546. if plate_json_path in zf.namelist():
  547. try:
  548. plate_json = json.loads(zf.read(plate_json_path).decode())
  549. # Get bbox_all - the bounding box of all objects (used for image bounds)
  550. bbox_all = plate_json.get("bbox_all")
  551. for bbox_obj in plate_json.get("bbox_objects", []):
  552. obj_name = bbox_obj.get("name")
  553. bbox = bbox_obj.get("bbox", [])
  554. if obj_name and len(bbox) >= 4:
  555. if obj_name not in bbox_by_name:
  556. bbox_by_name[obj_name] = []
  557. bbox_by_name[obj_name].append(bbox)
  558. except (json.JSONDecodeError, KeyError):
  559. pass # Position data is optional; objects will lack x/y coordinates
  560. # Extract objects from slice_info.config
  561. for obj in plate.findall("object"):
  562. identify_id = obj.get("identify_id")
  563. name = obj.get("name")
  564. skipped = obj.get("skipped", "false")
  565. if identify_id and name and skipped.lower() != "true":
  566. try:
  567. obj_id = int(identify_id)
  568. if include_positions:
  569. x, y = None, None
  570. # Match by name - pop first bbox to handle duplicates
  571. bboxes = bbox_by_name.get(name)
  572. if bboxes:
  573. bbox = bboxes.pop(0)
  574. # Calculate center from bbox [x_min, y_min, x_max, y_max]
  575. x = (bbox[0] + bbox[2]) / 2
  576. y = (bbox[1] + bbox[3]) / 2
  577. printable_objects[obj_id] = {"name": name, "x": x, "y": y}
  578. else:
  579. printable_objects[obj_id] = name
  580. except ValueError:
  581. pass # Skip objects with non-numeric identify_id
  582. except Exception:
  583. pass # Return empty dict if 3MF is corrupt or unreadable
  584. if include_positions:
  585. return printable_objects, bbox_all
  586. return printable_objects
  587. class ProjectPageParser:
  588. """Parser for extracting project page data from Bambu Lab 3MF files."""
  589. def __init__(self, file_path: Path):
  590. self.file_path = file_path
  591. def parse(self, archive_id: int) -> dict:
  592. """Extract project page metadata and images from 3MF file."""
  593. import html
  594. result = {
  595. "title": None,
  596. "description": None,
  597. "designer": None,
  598. "designer_user_id": None,
  599. "license": None,
  600. "copyright": None,
  601. "creation_date": None,
  602. "modification_date": None,
  603. "origin": None,
  604. "profile_title": None,
  605. "profile_description": None,
  606. "profile_cover": None,
  607. "profile_user_id": None,
  608. "profile_user_name": None,
  609. "design_model_id": None,
  610. "design_profile_id": None,
  611. "design_region": None,
  612. "model_pictures": [],
  613. "profile_pictures": [],
  614. "thumbnails": [],
  615. }
  616. try:
  617. with zipfile.ZipFile(self.file_path, "r") as zf:
  618. # Parse 3D/3dmodel.model for metadata
  619. model_path = "3D/3dmodel.model"
  620. if model_path in zf.namelist():
  621. content = zf.read(model_path).decode("utf-8", errors="ignore")
  622. # Extract metadata elements using regex
  623. # Format: <metadata name="Key">Value</metadata> or <metadata name="Key" />
  624. metadata_pattern = r'<metadata\s+name="([^"]+)"[^>]*>([^<]*)</metadata>'
  625. matches = re.findall(metadata_pattern, content)
  626. field_mapping = {
  627. "Title": "title",
  628. "Description": "description",
  629. "Designer": "designer",
  630. "DesignerUserId": "designer_user_id",
  631. "License": "license",
  632. "Copyright": "copyright",
  633. "CreationDate": "creation_date",
  634. "ModificationDate": "modification_date",
  635. "Origin": "origin",
  636. "ProfileTitle": "profile_title",
  637. "ProfileDescription": "profile_description",
  638. "ProfileCover": "profile_cover",
  639. "ProfileUserId": "profile_user_id",
  640. "ProfileUserName": "profile_user_name",
  641. "DesignModelId": "design_model_id",
  642. "DesignProfileId": "design_profile_id",
  643. "DesignRegion": "design_region",
  644. }
  645. for name, value in matches:
  646. if name in field_mapping:
  647. # Decode HTML entities multiple times (content is often triple-encoded)
  648. decoded = value.strip()
  649. prev = None
  650. while prev != decoded:
  651. prev = decoded
  652. decoded = html.unescape(decoded)
  653. # Normalize non-breaking spaces to regular spaces
  654. decoded = decoded.replace("\xa0", " ")
  655. result[field_mapping[name]] = decoded if decoded else None
  656. # List images in Auxiliaries folder
  657. from urllib.parse import quote
  658. for name in zf.namelist():
  659. if name.startswith("Auxiliaries/Model Pictures/"):
  660. filename = name.split("/")[-1]
  661. if filename:
  662. result["model_pictures"].append(
  663. {
  664. "name": filename,
  665. "path": name,
  666. "url": f"/api/v1/archives/{archive_id}/project-image/{quote(name, safe='')}",
  667. }
  668. )
  669. elif name.startswith("Auxiliaries/Profile Pictures/"):
  670. filename = name.split("/")[-1]
  671. if filename:
  672. result["profile_pictures"].append(
  673. {
  674. "name": filename,
  675. "path": name,
  676. "url": f"/api/v1/archives/{archive_id}/project-image/{quote(name, safe='')}",
  677. }
  678. )
  679. elif name.startswith("Auxiliaries/.thumbnails/"):
  680. filename = name.split("/")[-1]
  681. if filename:
  682. result["thumbnails"].append(
  683. {
  684. "name": filename,
  685. "path": name,
  686. "url": f"/api/v1/archives/{archive_id}/project-image/{quote(name, safe='')}",
  687. }
  688. )
  689. except Exception as e:
  690. result["_error"] = str(e)
  691. return result
  692. def get_image(self, image_path: str) -> tuple[bytes, str] | None:
  693. """Extract an image from the 3MF file.
  694. Returns tuple of (image_data, content_type) or None if not found.
  695. """
  696. try:
  697. with zipfile.ZipFile(self.file_path, "r") as zf:
  698. if image_path in zf.namelist():
  699. data = zf.read(image_path)
  700. # Determine content type from extension
  701. ext = image_path.lower().split(".")[-1]
  702. content_types = {
  703. "png": "image/png",
  704. "jpg": "image/jpeg",
  705. "jpeg": "image/jpeg",
  706. "webp": "image/webp",
  707. "gif": "image/gif",
  708. }
  709. content_type = content_types.get(ext, "application/octet-stream")
  710. return (data, content_type)
  711. except Exception:
  712. pass # Return None if image cannot be extracted from 3MF
  713. return None
  714. def update_metadata(self, updates: dict) -> bool:
  715. """Update project page metadata in the 3MF file.
  716. Args:
  717. updates: Dict with fields to update (title, description, designer, etc.)
  718. Returns:
  719. True if successful, False otherwise.
  720. """
  721. import html
  722. import tempfile
  723. try:
  724. # Read the 3MF file
  725. with zipfile.ZipFile(self.file_path, "r") as zf_read:
  726. # Find and read the 3dmodel.model file
  727. model_path = "3D/3dmodel.model"
  728. if model_path not in zf_read.namelist():
  729. return False
  730. content = zf_read.read(model_path).decode("utf-8")
  731. # Update metadata fields
  732. field_mapping = {
  733. "title": "Title",
  734. "description": "Description",
  735. "designer": "Designer",
  736. "license": "License",
  737. "copyright": "Copyright",
  738. "profile_title": "ProfileTitle",
  739. "profile_description": "ProfileDescription",
  740. }
  741. for field, xml_name in field_mapping.items():
  742. if field in updates and updates[field] is not None:
  743. new_value = html.escape(updates[field])
  744. # Replace existing metadata or we'd need to add it
  745. pattern = rf'(<metadata\s+name="{xml_name}"[^>]*>)[^<]*(</metadata>)'
  746. replacement = rf"\g<1>{new_value}\g<2>"
  747. content = re.sub(pattern, replacement, content)
  748. # Write to a temporary file first
  749. with tempfile.NamedTemporaryFile(delete=False, suffix=".3mf") as tmp:
  750. tmp_path = Path(tmp.name)
  751. # Create new zip with updated content
  752. with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zf_write:
  753. for item in zf_read.namelist():
  754. if item == model_path:
  755. zf_write.writestr(item, content.encode("utf-8"))
  756. else:
  757. zf_write.writestr(item, zf_read.read(item))
  758. # Replace original file with updated one
  759. shutil.move(tmp_path, self.file_path)
  760. return True
  761. except Exception:
  762. # Clean up temp file if it exists
  763. if "tmp_path" in locals() and tmp_path.exists():
  764. tmp_path.unlink()
  765. return False
  766. async def _null_print_log_thumbnail_paths(db: AsyncSession, archive_id: int) -> None:
  767. """NULL thumbnail_path on PrintLogEntry rows linked to *archive_id*.
  768. Called from both soft- and hard-delete paths before the archive's files
  769. leave disk. The FK on PrintLogEntry.archive_id is ON DELETE SET NULL so
  770. log rows survive the archive — without this clear, their cached
  771. thumbnail_path would still point at a deleted file and the print-log
  772. view would 404-storm on every render (#1348 follow-up). Lazy-NULL on
  773. the GET route self-heals stragglers (e.g. failed prints that never had
  774. a thumbnail written), but eager clear here avoids the one-time storm.
  775. """
  776. from sqlalchemy import update as sa_update
  777. from backend.app.models.print_log import PrintLogEntry
  778. await db.execute(sa_update(PrintLogEntry).where(PrintLogEntry.archive_id == archive_id).values(thumbnail_path=None))
  779. async def _cancel_pending_queue_items(db: AsyncSession, archive_id: int) -> None:
  780. """Cancel pending queue items pointing at *archive_id* (#1348 follow-up).
  781. Called from ``soft_delete_archive`` only — hard-delete is covered by the
  782. ``ON DELETE CASCADE`` on ``print_queue.archive_id``. A queue item
  783. pointing at an archive whose 3MF has been removed from disk can never
  784. actually dispatch, so cancelling at delete time both (a) tells the user
  785. why the item disappeared from the pending list, and (b) stops the queue
  786. page from 404-storming the archive thumbnail / plates / plate-thumbnail
  787. endpoints when the row is rendered. Only ``pending`` items are touched;
  788. ``printing`` is a rare race the printer-side fail-path catches, and
  789. completed / failed / cancelled rows are historical and untouched.
  790. """
  791. from sqlalchemy import update as sa_update
  792. from backend.app.models.print_queue import PrintQueueItem
  793. await db.execute(
  794. sa_update(PrintQueueItem)
  795. .where(PrintQueueItem.archive_id == archive_id, PrintQueueItem.status == "pending")
  796. .values(status="cancelled", waiting_reason="Source archive deleted")
  797. )
  798. class ArchiveService:
  799. """Service for archiving print jobs."""
  800. def __init__(self, db: AsyncSession):
  801. self.db = db
  802. @staticmethod
  803. def compute_file_hash(file_path: Path) -> str:
  804. """Compute SHA256 hash of a file for duplicate detection."""
  805. sha256 = hashlib.sha256()
  806. with open(file_path, "rb") as f:
  807. # Read in chunks to handle large files
  808. for chunk in iter(lambda: f.read(8192), b""):
  809. sha256.update(chunk)
  810. return sha256.hexdigest()
  811. async def get_duplicate_hashes_and_names(self) -> tuple[set[str], set[tuple[str, str]]]:
  812. """Get all content hashes and (print name, hash) pairs that appear more than once.
  813. For hashes: returns all hashes with > 1 archive (true duplicates).
  814. For name/hash pairs: returns only pairs that have > 1 archive
  815. (i.e., same file archived multiple times, not different files with same name).
  816. Returns a tuple of (duplicate_hashes, duplicate_name_hash_pairs).
  817. """
  818. from sqlalchemy import func
  819. # Soft-deleted archives don't appear in the listing (#1343), so they
  820. # mustn't influence the duplicate-group counts either — otherwise a
  821. # group with 1 live + 4 soft-deleted would still be flagged as a
  822. # duplicate even though the user only sees one row.
  823. result = await self.db.execute(
  824. select(PrintArchive.content_hash)
  825. .where(PrintArchive.content_hash.isnot(None), PrintArchive.deleted_at.is_(None))
  826. .group_by(PrintArchive.content_hash)
  827. .having(func.count(PrintArchive.id) > 1)
  828. )
  829. duplicate_hashes = {row[0] for row in result.all()}
  830. # Find print names that have multiple archives with the SAME hash
  831. # This avoids marking different files with the same name as duplicates
  832. result = await self.db.execute(
  833. select(func.lower(PrintArchive.print_name), PrintArchive.content_hash)
  834. .where(
  835. PrintArchive.print_name.isnot(None),
  836. PrintArchive.content_hash.isnot(None),
  837. PrintArchive.deleted_at.is_(None),
  838. )
  839. .group_by(func.lower(PrintArchive.print_name), PrintArchive.content_hash)
  840. .having(func.count(PrintArchive.id) > 1)
  841. )
  842. duplicate_name_hash_pairs = {(row[0], row[1]) for row in result.all()}
  843. return duplicate_hashes, duplicate_name_hash_pairs
  844. async def find_duplicates(
  845. self,
  846. archive_id: int,
  847. content_hash: str | None = None,
  848. print_name: str | None = None,
  849. makerworld_model_id: str | None = None,
  850. ) -> list[dict]:
  851. """Find duplicate archives based on hash or name matching.
  852. Returns list of dicts with id, print_name, created_at, match_type.
  853. """
  854. duplicates = []
  855. # First, find exact matches by content hash
  856. if content_hash:
  857. result = await self.db.execute(
  858. select(PrintArchive)
  859. .where(
  860. and_(
  861. PrintArchive.content_hash == content_hash,
  862. PrintArchive.id != archive_id,
  863. PrintArchive.deleted_at.is_(None),
  864. )
  865. )
  866. .order_by(PrintArchive.created_at.desc())
  867. .limit(10)
  868. )
  869. for archive in result.scalars().all():
  870. duplicates.append(
  871. {
  872. "id": archive.id,
  873. "print_name": archive.print_name,
  874. "created_at": archive.created_at,
  875. "match_type": "exact",
  876. }
  877. )
  878. # Then, find similar matches by print name or MakerWorld ID
  879. # Prefer strict name+hash matching when hash exists; fallback to name-only for legacy/manual
  880. # archives that may not have a content_hash.
  881. if print_name or makerworld_model_id:
  882. conditions = [PrintArchive.id != archive_id, PrintArchive.deleted_at.is_(None)]
  883. name_conditions = []
  884. if print_name:
  885. if content_hash:
  886. # Match if print names are similar AND have the same hash (same file)
  887. name_conditions.append(
  888. and_(PrintArchive.print_name.ilike(print_name), PrintArchive.content_hash == content_hash)
  889. )
  890. else:
  891. # Fallback for archives without hash data: match by print name only.
  892. name_conditions.append(PrintArchive.print_name.ilike(print_name))
  893. if makerworld_model_id:
  894. # Match by MakerWorld model ID stored in extra_data
  895. from backend.app.core.db_dialect import is_sqlite
  896. if is_sqlite():
  897. from sqlalchemy import func
  898. name_conditions.append(
  899. func.json_extract(PrintArchive.extra_data, "$.makerworld_model_id") == str(makerworld_model_id)
  900. )
  901. else:
  902. name_conditions.append(
  903. text("(extra_data::jsonb->>'makerworld_model_id') = :mw_id").bindparams(
  904. mw_id=str(makerworld_model_id)
  905. )
  906. )
  907. if name_conditions:
  908. conditions.append(or_(*name_conditions))
  909. result = await self.db.execute(
  910. select(PrintArchive).where(and_(*conditions)).order_by(PrintArchive.created_at.desc()).limit(10)
  911. )
  912. for archive in result.scalars().all():
  913. # Don't add if already in duplicates (exact match)
  914. if not any(d["id"] == archive.id for d in duplicates):
  915. duplicates.append(
  916. {
  917. "id": archive.id,
  918. "print_name": archive.print_name,
  919. "created_at": archive.created_at,
  920. "match_type": "similar",
  921. }
  922. )
  923. return duplicates
  924. async def archive_print(
  925. self,
  926. printer_id: int | None,
  927. source_file: Path,
  928. print_data: dict | None = None,
  929. created_by_id: int | None = None,
  930. original_filename: str | None = None,
  931. project_id: int | None = None,
  932. subtask_id: str | None = None,
  933. prefer_filename_for_name: bool = False,
  934. ) -> PrintArchive | None:
  935. """Archive a 3MF file with metadata.
  936. Args:
  937. printer_id: ID of the printer (optional)
  938. source_file: Path to the 3MF file
  939. print_data: Print data from MQTT (optional)
  940. created_by_id: User ID who created this archive (optional, for user tracking)
  941. original_filename: Original human-readable filename (optional, for library files
  942. stored with UUID names)
  943. project_id: Project to associate this archive with (optional, set when triggered
  944. from the project view)
  945. subtask_id: MQTT-provided task identifier (optional). Used to match an
  946. existing archive across a backend restart mid-print so the
  947. original row can be resumed instead of cancelled (#972).
  948. prefer_filename_for_name: When True, use the uploaded filename stem as the
  949. archive's display name even if the 3MF embeds a `print_name` in its
  950. metadata. Used by virtual-printer flows so users who rename a job in
  951. BambuStudio's "send to printer" dialog see that name instead of the
  952. creator-baked title (#1152).
  953. """
  954. # Verify printer exists if specified
  955. if printer_id is not None:
  956. result = await self.db.execute(select(Printer).where(Printer.id == printer_id))
  957. printer = result.scalar_one_or_none()
  958. if not printer:
  959. return None
  960. # Create archive directory structure
  961. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  962. display_stem = resolve_display_stem(original_filename if original_filename else source_file.name)
  963. archive_name = f"{timestamp}_{display_stem}"
  964. # Use "unassigned" folder for archives without a printer
  965. printer_folder = str(printer_id) if printer_id is not None else "unassigned"
  966. archive_dir = (
  967. settings.archive_dir / printer_folder / archive_name
  968. ) # SEC-PATH-OK: printer_folder = str(int|None) → digits or "unassigned"; archive_name = f"{timestamp}_{display_stem}" where resolve_display_stem strips path components via Path(filename).name
  969. archive_dir.mkdir(parents=True, exist_ok=True)
  970. # Copy 3MF file with an explicit fsync'd loop (avoids a sendfile
  971. # short-read quirk that silently truncated 3MF archives on some
  972. # platforms — see _copy_and_fsync and #1032).
  973. dest_file = archive_dir / source_file.name
  974. _copy_and_fsync(source_file, dest_file)
  975. # If we just archived a 3MF, verify the dest is a valid ZIP before
  976. # going any further. Staying quiet here is how #1032 escaped review —
  977. # the archive row was written but every later zipfile.ZipFile() call
  978. # on the dest failed with "File is not a zip file".
  979. if (
  980. source_file.suffix.lower() == ".3mf"
  981. and zipfile.is_zipfile(source_file)
  982. and not zipfile.is_zipfile(dest_file)
  983. ):
  984. try:
  985. src_size = source_file.stat().st_size
  986. dst_size = dest_file.stat().st_size
  987. except OSError:
  988. src_size = dst_size = -1
  989. logger.error(
  990. "Archive copy corrupted 3MF: src=%s (%s bytes, valid ZIP) -> dst=%s (%s bytes, NOT a ZIP). Refusing to create archive row.",
  991. source_file,
  992. src_size,
  993. dest_file,
  994. dst_size,
  995. )
  996. # Narrow cleanup: remove only the truncated file and the archive
  997. # directory if it's now empty. archive_dir was created with
  998. # exist_ok=True so it could in theory pre-date this call (e.g.
  999. # same-second same-filename collision); rmtree would be too broad.
  1000. try:
  1001. dest_file.unlink()
  1002. except OSError:
  1003. pass
  1004. try:
  1005. archive_dir.rmdir()
  1006. except OSError:
  1007. pass # directory not empty — leave untouched
  1008. return None
  1009. # Compute content hash for duplicate detection
  1010. content_hash = self.compute_file_hash(dest_file)
  1011. # Extract plate number from filename (e.g., "plate_5" from "/data/Metadata/plate_5.gcode")
  1012. plate_number = None
  1013. if print_data:
  1014. filename = print_data.get("filename", "")
  1015. match = re.search(r"plate_(\d+)", filename)
  1016. if match:
  1017. plate_number = int(match.group(1))
  1018. # Parse 3MF metadata
  1019. parser = ThreeMFParser(dest_file, plate_number=plate_number)
  1020. metadata = parser.parse()
  1021. # Save thumbnail if present
  1022. thumbnail_path = None
  1023. if "_thumbnail_data" in metadata:
  1024. thumb_file = archive_dir / f"thumbnail{metadata['_thumbnail_ext']}"
  1025. thumb_file.write_bytes(metadata["_thumbnail_data"])
  1026. thumbnail_path = str(thumb_file.relative_to(settings.base_dir))
  1027. del metadata["_thumbnail_data"]
  1028. del metadata["_thumbnail_ext"]
  1029. # Merge with print data from MQTT
  1030. if print_data:
  1031. metadata["_print_data"] = print_data
  1032. # Determine status and timestamps
  1033. status = print_data.get("status", "completed") if print_data else "archived"
  1034. started_at = datetime.now(timezone.utc) if status == "printing" else None
  1035. completed_at = datetime.now(timezone.utc) if status in ("completed", "failed", "archived") else None
  1036. # Calculate cost based on filament usage and type
  1037. cost = None
  1038. filament_grams = metadata.get("filament_used_grams")
  1039. filament_type = metadata.get("filament_type")
  1040. if filament_grams and filament_type:
  1041. # For multi-material prints, use the first filament type for cost calculation
  1042. primary_type = filament_type.split(",")[0].strip()
  1043. # Look up filament cost_per_kg from database
  1044. filament_result = await self.db.execute(select(Filament).where(Filament.type == primary_type).limit(1))
  1045. filament = filament_result.scalar_one_or_none()
  1046. if filament:
  1047. cost = round((filament_grams / 1000) * filament.cost_per_kg, 2)
  1048. else:
  1049. # Use default filament cost from settings
  1050. from backend.app.api.routes.settings import get_setting
  1051. default_cost_setting = await get_setting(self.db, "default_filament_cost")
  1052. default_cost_per_kg = float(default_cost_setting) if default_cost_setting else 25.0
  1053. cost = round((filament_grams / 1000) * default_cost_per_kg, 2)
  1054. # Calculate quantity from printable objects count
  1055. # printable_objects is a dict of {identify_id: name} for non-skipped objects
  1056. quantity = 1 # Default to 1
  1057. printable_objects = metadata.get("printable_objects")
  1058. if printable_objects and isinstance(printable_objects, dict):
  1059. quantity = len(printable_objects)
  1060. logger.debug("Auto-detected %s parts from 3MF printable objects", quantity)
  1061. # Create archive record
  1062. archive = PrintArchive(
  1063. printer_id=printer_id,
  1064. filename=original_filename or source_file.name,
  1065. file_path=str(dest_file.relative_to(settings.base_dir)),
  1066. file_size=dest_file.stat().st_size,
  1067. content_hash=content_hash,
  1068. thumbnail_path=thumbnail_path,
  1069. print_name=display_stem if prefer_filename_for_name else (metadata.get("print_name") or display_stem),
  1070. print_time_seconds=metadata.get("print_time_seconds"),
  1071. filament_used_grams=metadata.get("filament_used_grams"),
  1072. filament_type=metadata.get("filament_type"),
  1073. filament_color=metadata.get("filament_color"),
  1074. layer_height=metadata.get("layer_height"),
  1075. total_layers=metadata.get("total_layers"),
  1076. nozzle_diameter=metadata.get("nozzle_diameter"),
  1077. bed_temperature=metadata.get("bed_temperature"),
  1078. bed_type=metadata.get("bed_type"),
  1079. nozzle_temperature=metadata.get("nozzle_temperature"),
  1080. sliced_for_model=metadata.get("sliced_for_model"),
  1081. makerworld_url=metadata.get("makerworld_url"),
  1082. designer=metadata.get("designer"),
  1083. status=status,
  1084. started_at=started_at,
  1085. completed_at=completed_at,
  1086. cost=cost,
  1087. quantity=quantity,
  1088. extra_data=metadata,
  1089. created_by_id=created_by_id,
  1090. project_id=project_id,
  1091. subtask_id=subtask_id,
  1092. )
  1093. self.db.add(archive)
  1094. await self.db.commit()
  1095. await self.db.refresh(archive)
  1096. return archive
  1097. async def get_archive(self, archive_id: int) -> PrintArchive | None:
  1098. """Get an archive by ID with relationships loaded."""
  1099. from sqlalchemy.orm import selectinload
  1100. result = await self.db.execute(
  1101. select(PrintArchive)
  1102. .options(selectinload(PrintArchive.created_by), selectinload(PrintArchive.project))
  1103. .where(PrintArchive.id == archive_id)
  1104. )
  1105. return result.scalar_one_or_none()
  1106. async def update_archive_status(
  1107. self,
  1108. archive_id: int,
  1109. status: str,
  1110. completed_at: datetime | None = None,
  1111. failure_reason: str | None = None,
  1112. ) -> bool:
  1113. """Update the status of an archive."""
  1114. archive = await self.get_archive(archive_id)
  1115. if not archive:
  1116. return False
  1117. archive.status = status
  1118. if completed_at:
  1119. archive.completed_at = completed_at
  1120. if failure_reason:
  1121. archive.failure_reason = failure_reason
  1122. await self.db.commit()
  1123. return True
  1124. async def list_archives(
  1125. self,
  1126. printer_id: int | None = None,
  1127. project_id: int | None = None,
  1128. date_from: date | None = None,
  1129. date_to: date | None = None,
  1130. limit: int = 50,
  1131. offset: int = 0,
  1132. ) -> list[PrintArchive]:
  1133. """List archives with optional filtering."""
  1134. from sqlalchemy.orm import selectinload
  1135. query = (
  1136. select(PrintArchive)
  1137. .options(selectinload(PrintArchive.project), selectinload(PrintArchive.created_by))
  1138. # Hide soft-deleted rows from the listings (#1343). The stats
  1139. # endpoint deliberately does NOT add this filter so deleted
  1140. # archives keep contributing to Quick Stats.
  1141. .where(PrintArchive.deleted_at.is_(None))
  1142. .order_by(PrintArchive.created_at.desc())
  1143. )
  1144. if printer_id:
  1145. query = query.where(PrintArchive.printer_id == printer_id)
  1146. if project_id:
  1147. query = query.where(PrintArchive.project_id == project_id)
  1148. if date_from:
  1149. dt_from = datetime.combine(date_from, time.min, tzinfo=timezone.utc)
  1150. query = query.where(PrintArchive.created_at >= dt_from)
  1151. if date_to:
  1152. dt_to = datetime.combine(date_to, time.max, tzinfo=timezone.utc)
  1153. query = query.where(PrintArchive.created_at <= dt_to)
  1154. query = query.limit(limit).offset(offset)
  1155. result = await self.db.execute(query)
  1156. return list(result.scalars().all())
  1157. async def soft_delete_archive(self, archive_id: int) -> bool:
  1158. """Soft-delete an archive (#1343).
  1159. Removes the archive's files from disk (it disappears from the listings
  1160. and frees the storage) but flips the row's ``deleted_at`` so the stats
  1161. endpoint keeps counting its filament / energy / time / cost. The user
  1162. can opt into a hard delete via the "Also remove from statistics"
  1163. checkbox in the delete dialog — that path calls ``delete_archive``
  1164. instead and removes the row entirely.
  1165. """
  1166. archive = await self.get_archive(archive_id)
  1167. if not archive:
  1168. return False
  1169. if archive.deleted_at is not None:
  1170. # Already soft-deleted; nothing to do. The files were purged on
  1171. # the first soft-delete pass so there is nothing left on disk.
  1172. return True
  1173. dir_to_delete = self._resolve_archive_dir_for_delete(archive)
  1174. await _null_print_log_thumbnail_paths(self.db, archive_id)
  1175. await _cancel_pending_queue_items(self.db, archive_id)
  1176. archive.deleted_at = datetime.now(timezone.utc)
  1177. await self.db.commit()
  1178. if dir_to_delete:
  1179. shutil.rmtree(dir_to_delete, ignore_errors=True)
  1180. return True
  1181. def _resolve_archive_dir_for_delete(self, archive: PrintArchive) -> Path | None:
  1182. """Return the on-disk directory that backs *archive*, after the same
  1183. two safety checks ``delete_archive`` enforces.
  1184. Extracted so soft-delete and hard-delete share the path-resolution
  1185. rules. Returns ``None`` when nothing should be removed from disk
  1186. (no file_path, path outside archive_dir, or path not deep enough).
  1187. """
  1188. if not archive.file_path or not archive.file_path.strip():
  1189. logger.error(
  1190. f"SECURITY: Refusing to delete files for archive {archive.id} - "
  1191. f"file_path is empty or invalid: '{archive.file_path}'"
  1192. )
  1193. return None
  1194. file_path = settings.base_dir / archive.file_path
  1195. if not file_path.exists():
  1196. return None
  1197. archive_dir = file_path.parent
  1198. try:
  1199. relative_path = archive_dir.resolve().relative_to(settings.archive_dir.resolve())
  1200. except ValueError:
  1201. logger.error(
  1202. f"SECURITY: Refusing to delete archive {archive.id} - "
  1203. f"path {archive_dir} is outside archive directory {settings.archive_dir}"
  1204. )
  1205. return None
  1206. if len(relative_path.parts) < 1:
  1207. logger.error(
  1208. f"SECURITY: Refusing to delete archive {archive.id} - "
  1209. f"path {archive_dir} is not deep enough inside archive directory"
  1210. )
  1211. return None
  1212. return archive_dir
  1213. async def delete_archive(self, archive_id: int) -> bool:
  1214. """Delete an archive and its files."""
  1215. archive = await self.get_archive(archive_id)
  1216. if not archive:
  1217. return False
  1218. # Resolve the directory to delete BEFORE committing the DB change
  1219. dir_to_delete: Path | None = None
  1220. if archive.file_path and archive.file_path.strip():
  1221. file_path = settings.base_dir / archive.file_path
  1222. if file_path.exists():
  1223. archive_dir = file_path.parent
  1224. # Safety check 1: archive_dir must be inside archive_dir
  1225. try:
  1226. archive_dir.resolve().relative_to(settings.archive_dir.resolve())
  1227. except ValueError:
  1228. logger.error(
  1229. f"SECURITY: Refusing to delete archive {archive_id} - "
  1230. f"path {archive_dir} is outside archive directory {settings.archive_dir}"
  1231. )
  1232. await self.db.delete(archive)
  1233. await self.db.commit()
  1234. return True
  1235. # Safety check 2: archive_dir must be at least 1 level deep inside archive_dir
  1236. try:
  1237. relative_path = archive_dir.resolve().relative_to(settings.archive_dir.resolve())
  1238. if len(relative_path.parts) < 1:
  1239. logger.error(
  1240. f"SECURITY: Refusing to delete archive {archive_id} - "
  1241. f"path {archive_dir} is not deep enough inside archive directory"
  1242. )
  1243. await self.db.delete(archive)
  1244. await self.db.commit()
  1245. return True
  1246. except ValueError:
  1247. pass # Already handled above
  1248. dir_to_delete = archive_dir
  1249. else:
  1250. logger.error(
  1251. f"SECURITY: Refusing to delete files for archive {archive_id} - "
  1252. f"file_path is empty or invalid: '{archive.file_path}'"
  1253. )
  1254. # NULL stale thumbnail_path on linked PrintLogEntries before the FK
  1255. # SET-NULL cascade fires. The on-disk file is about to be removed by
  1256. # the rmtree below, so the path on any surviving log entry (archive_id
  1257. # gets SET NULL by the FK) would otherwise point at a missing file
  1258. # and produce 404 storms in the print-log view (#1348-followup).
  1259. await _null_print_log_thumbnail_paths(self.db, archive_id)
  1260. # Delete database record FIRST — if the commit fails (e.g. database locked
  1261. # during concurrent bulk deletes), the files stay on disk and nothing is lost.
  1262. await self.db.delete(archive)
  1263. await self.db.commit()
  1264. # Only delete files AFTER the DB commit succeeds to avoid orphaned records
  1265. if dir_to_delete:
  1266. shutil.rmtree(dir_to_delete, ignore_errors=True)
  1267. return True
  1268. async def attach_timelapse(
  1269. self,
  1270. archive_id: int,
  1271. timelapse_data: bytes,
  1272. filename: str = "timelapse.mp4",
  1273. ) -> bool:
  1274. """Attach a timelapse video to an archive.
  1275. Non-MP4 videos (e.g. AVI from P1S) are saved as-is and a background
  1276. task converts them to MP4 for browser compatibility.
  1277. """
  1278. import asyncio
  1279. archive = await self.get_archive(archive_id)
  1280. if not archive:
  1281. return False
  1282. # Get archive directory
  1283. file_path = (
  1284. settings.base_dir / archive.file_path
  1285. ) # SEC-PATH-OK: archive.file_path is DB-stored, set by archive_print() under settings.archive_dir
  1286. archive_dir = file_path.parent
  1287. # Save timelapse - use thread pool to avoid blocking event loop
  1288. # (timelapse files can be 100MB+, sync write blocks for seconds).
  1289. # `filename` ultimately comes from a printer's FTP listing (compromised-
  1290. # printer threat model) or a query param on /archives/{id}/timelapse/select;
  1291. # the safe-join helper rejects ``..`` segments and absolute paths so a
  1292. # crafted name can't escape the archive directory. Use http=False so a
  1293. # service-layer reject surfaces as a return False (matching the existing
  1294. # not-found contract) rather than a 400 raised from inside a background
  1295. # task.
  1296. try:
  1297. timelapse_file = safe_join_under(archive_dir, filename, http=False)
  1298. except PathTraversalError:
  1299. logger.warning(
  1300. "Refusing to attach timelapse with unsafe filename %r to archive %s",
  1301. filename,
  1302. archive_id,
  1303. )
  1304. return False
  1305. await asyncio.to_thread(timelapse_file.write_bytes, timelapse_data)
  1306. # Update archive record
  1307. archive.timelapse_path = str(timelapse_file.relative_to(settings.base_dir))
  1308. await self.db.commit()
  1309. # For non-MP4 videos (e.g. AVI from P1S), kick off background conversion
  1310. if not filename.lower().endswith(".mp4"):
  1311. asyncio.create_task(
  1312. _convert_timelapse_to_mp4(archive_id, timelapse_file),
  1313. name=f"timelapse-convert-{archive_id}",
  1314. )
  1315. return True
  1316. async def _convert_timelapse_to_mp4(archive_id: int, source_path: Path) -> None:
  1317. """Background task: convert non-MP4 timelapse (e.g. AVI from P1S) to MP4.
  1318. Runs with low CPU priority (-threads 1, nice) so it doesn't starve
  1319. other processes on resource-constrained devices like Raspberry Pi.
  1320. """
  1321. import asyncio
  1322. from backend.app.core.database import async_session
  1323. from backend.app.services.camera import get_ffmpeg_path
  1324. logger = logging.getLogger(__name__)
  1325. ffmpeg = get_ffmpeg_path()
  1326. if not ffmpeg:
  1327. logger.info(
  1328. "FFmpeg not available, skipping timelapse conversion for archive %s (file saved as %s)",
  1329. archive_id,
  1330. source_path.suffix,
  1331. )
  1332. return
  1333. mp4_path = source_path.with_suffix(".mp4")
  1334. try:
  1335. cmd = [
  1336. ffmpeg,
  1337. "-y",
  1338. "-i",
  1339. str(source_path),
  1340. "-c:v",
  1341. "libx264",
  1342. "-preset",
  1343. "fast",
  1344. "-crf",
  1345. "23",
  1346. "-threads",
  1347. "1",
  1348. "-movflags",
  1349. "+faststart",
  1350. str(mp4_path),
  1351. ]
  1352. # Try with nice for lower CPU priority (standard on Linux/macOS)
  1353. try:
  1354. process = await asyncio.create_subprocess_exec(
  1355. "nice",
  1356. "-n",
  1357. "19",
  1358. *cmd,
  1359. stdout=asyncio.subprocess.PIPE,
  1360. stderr=asyncio.subprocess.PIPE,
  1361. )
  1362. except FileNotFoundError:
  1363. # nice not available (e.g. Windows), run without
  1364. process = await asyncio.create_subprocess_exec(
  1365. *cmd,
  1366. stdout=asyncio.subprocess.PIPE,
  1367. stderr=asyncio.subprocess.PIPE,
  1368. )
  1369. _, stderr = await process.communicate()
  1370. if process.returncode != 0:
  1371. logger.warning(
  1372. "Timelapse conversion failed for archive %s: %s",
  1373. archive_id,
  1374. stderr.decode()[-500:],
  1375. )
  1376. if mp4_path.exists():
  1377. mp4_path.unlink()
  1378. return
  1379. # Update DB path to the new MP4 file
  1380. async with async_session() as db:
  1381. from backend.app.models.archive import PrintArchive
  1382. result = await db.execute(select(PrintArchive).where(PrintArchive.id == archive_id))
  1383. archive = result.scalar_one_or_none()
  1384. if archive:
  1385. archive.timelapse_path = str(mp4_path.relative_to(settings.base_dir))
  1386. await db.commit()
  1387. # Remove original non-MP4 file
  1388. if source_path.exists():
  1389. source_path.unlink()
  1390. logger.info(
  1391. "Converted timelapse to MP4 for archive %s (%s → %s)",
  1392. archive_id,
  1393. source_path.name,
  1394. mp4_path.name,
  1395. )
  1396. except Exception as e:
  1397. logger.warning("Timelapse conversion error for archive %s: %s", archive_id, e)
  1398. if mp4_path.exists():
  1399. mp4_path.unlink()