archive.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. import hashlib
  2. import json
  3. import logging
  4. import re
  5. import shutil
  6. import zipfile
  7. from datetime import datetime
  8. from pathlib import Path
  9. from defusedxml import ElementTree as ET
  10. from defusedxml.ElementTree import ParseError as XMLParseError
  11. from sqlalchemy import and_, or_, select
  12. from sqlalchemy.ext.asyncio import AsyncSession
  13. from backend.app.core.config import settings
  14. from backend.app.models.archive import PrintArchive
  15. from backend.app.models.filament import Filament
  16. from backend.app.models.printer import Printer
  17. logger = logging.getLogger(__name__)
  18. class ThreeMFParser:
  19. """Parser for Bambu Lab 3MF files."""
  20. def __init__(self, file_path: Path, plate_number: int | None = None):
  21. self.file_path = file_path
  22. self.plate_number = plate_number # Which plate was printed (1, 2, 3, etc.)
  23. self.metadata: dict = {}
  24. def parse(self) -> dict:
  25. """Extract metadata from 3MF file."""
  26. try:
  27. with zipfile.ZipFile(self.file_path, "r") as zf:
  28. self._parse_slice_info(zf) # Now sets self.plate_number from slice_info
  29. self._parse_project_settings(zf)
  30. self._parse_gcode_header(zf)
  31. self._parse_3dmodel(zf)
  32. self._extract_thumbnail(zf) # Uses correct plate_number for thumbnail
  33. # Enhance print_name with plate info if this is a multi-plate export
  34. plate_index = self.metadata.get("_plate_index")
  35. if plate_index and plate_index > 1:
  36. # Append plate number to distinguish from other plates
  37. existing_name = self.metadata.get("print_name", "")
  38. if existing_name and f"Plate {plate_index}" not in existing_name:
  39. self.metadata["print_name"] = f"{existing_name} - Plate {plate_index}"
  40. # ALWAYS prefer slice_info values - they contain ONLY filaments actually used in print
  41. # project_settings contains ALL configured filaments (AMS slots), not just used ones
  42. if self.metadata.get("_slice_filament_type"):
  43. self.metadata["filament_type"] = self.metadata["_slice_filament_type"]
  44. if self.metadata.get("_slice_filament_color"):
  45. self.metadata["filament_color"] = self.metadata["_slice_filament_color"]
  46. # Clean up internal keys
  47. self.metadata.pop("_slice_filament_type", None)
  48. self.metadata.pop("_slice_filament_color", None)
  49. self.metadata.pop("_plate_index", None)
  50. except (KeyError, ValueError, zipfile.BadZipFile, XMLParseError, UnicodeDecodeError):
  51. pass # Return whatever metadata was extracted before the error
  52. return self.metadata
  53. def _parse_slice_info(self, zf: zipfile.ZipFile):
  54. """Parse slice_info.config for print settings and printable objects."""
  55. try:
  56. if "Metadata/slice_info.config" in zf.namelist():
  57. content = zf.read("Metadata/slice_info.config").decode()
  58. root = ET.fromstring(content)
  59. # Extract printer_model_id from plate metadata
  60. # Format: <plate><metadata key="printer_model_id" value="C11" /></plate>
  61. for meta in root.findall(".//metadata"):
  62. key = meta.get("key")
  63. value = meta.get("value")
  64. if key == "printer_model_id" and value:
  65. from backend.app.utils.printer_models import normalize_printer_model_id
  66. normalized = normalize_printer_model_id(value)
  67. if normalized:
  68. self.metadata["sliced_for_model"] = normalized
  69. break
  70. # Find the plate element (single-plate exports only have one plate)
  71. plate = root.find(".//plate")
  72. if plate is not None:
  73. # Extract metadata from plate element
  74. for meta in plate.findall("metadata"):
  75. key = meta.get("key")
  76. value = meta.get("value")
  77. if key == "index" and value:
  78. # Extract plate index - this tells us which plate was exported
  79. try:
  80. extracted_index = int(value)
  81. # Set plate_number if not already set from filename
  82. if not self.plate_number:
  83. self.plate_number = extracted_index
  84. # Store in metadata for print_name generation
  85. self.metadata["_plate_index"] = extracted_index
  86. except ValueError:
  87. pass # Skip non-numeric plate index
  88. elif key == "prediction" and value:
  89. self.metadata["print_time_seconds"] = int(value)
  90. elif key == "weight" and value:
  91. self.metadata["filament_used_grams"] = float(value)
  92. # Extract printable objects for skip object functionality
  93. # Objects are stored as <object identify_id="123" name="Part1" skipped="false" />
  94. printable_objects = {}
  95. for obj in plate.findall("object"):
  96. identify_id = obj.get("identify_id")
  97. name = obj.get("name")
  98. skipped = obj.get("skipped", "false")
  99. # Only include objects that are not pre-skipped
  100. if identify_id and name and skipped.lower() != "true":
  101. try:
  102. printable_objects[int(identify_id)] = name
  103. except ValueError:
  104. pass # Skip objects with non-numeric identify_id
  105. if printable_objects:
  106. self.metadata["printable_objects"] = printable_objects
  107. # Get filament info from filaments ACTUALLY USED in the print
  108. # slice_info has <filament id="1" type="PLA" color="#FFFFFF" used_g="100" />
  109. # Only include filaments where used_g > 0
  110. filaments = root.findall(".//filament")
  111. if filaments:
  112. # Collect unique filament types and colors for filaments that are actually used
  113. types = []
  114. colors = []
  115. for f in filaments:
  116. # Check if this filament is actually used in the print
  117. used_g = f.get("used_g", "0")
  118. try:
  119. used_amount = float(used_g)
  120. except (ValueError, TypeError):
  121. used_amount = 0
  122. # Only include if used_g > 0 (filament is actually consumed)
  123. if used_amount > 0:
  124. ftype = f.get("type")
  125. fcolor = f.get("color")
  126. if ftype and ftype not in types:
  127. types.append(ftype)
  128. if fcolor and fcolor not in colors:
  129. colors.append(fcolor)
  130. if types:
  131. self.metadata["_slice_filament_type"] = ", ".join(types)
  132. if colors:
  133. self.metadata["_slice_filament_color"] = ",".join(colors)
  134. except (KeyError, ValueError, XMLParseError, UnicodeDecodeError):
  135. pass # Skip unparseable slice_info metadata
  136. def _parse_project_settings(self, zf: zipfile.ZipFile):
  137. """Parse project settings for print configuration."""
  138. try:
  139. if "Metadata/project_settings.config" in zf.namelist():
  140. content = zf.read("Metadata/project_settings.config").decode()
  141. try:
  142. data = json.loads(content)
  143. self._extract_filament_info(data)
  144. self._extract_print_settings(data)
  145. except json.JSONDecodeError:
  146. pass # Skip malformed project_settings JSON
  147. except (KeyError, ValueError, UnicodeDecodeError):
  148. pass # Skip unreadable project settings file
  149. def _parse_gcode_header(self, zf: zipfile.ZipFile):
  150. """Parse G-code file header for total layer count and printer model."""
  151. import re
  152. try:
  153. # Look for plate_1.gcode or similar
  154. gcode_files = [f for f in zf.namelist() if f.endswith(".gcode")]
  155. if not gcode_files:
  156. return
  157. # Read first 4KB of G-code (header contains metadata)
  158. gcode_path = gcode_files[0]
  159. with zf.open(gcode_path) as f:
  160. header = f.read(4096).decode("utf-8", errors="ignore")
  161. # Look for "; total layer number: XX" pattern
  162. match = re.search(r";\s*total\s+layer\s+number[:\s]+(\d+)", header, re.IGNORECASE)
  163. if match:
  164. self.metadata["total_layers"] = int(match.group(1))
  165. # Look for printer_model in gcode header (fallback if not found in slice_info)
  166. # Format: "; printer_model = Bambu Lab X1 Carbon" or "; printer_model = X1C"
  167. if "sliced_for_model" not in self.metadata:
  168. match = re.search(r";\s*printer_model\s*=\s*(.+)", header, re.IGNORECASE)
  169. if match:
  170. from backend.app.utils.printer_models import normalize_printer_model
  171. raw_model = match.group(1).strip()
  172. self.metadata["sliced_for_model"] = normalize_printer_model(raw_model)
  173. except (KeyError, ValueError, UnicodeDecodeError):
  174. pass # G-code header parsing is best-effort; metadata may come from other sources
  175. def _extract_filament_info(self, data: dict):
  176. """Extract filament info, preferring non-support filaments."""
  177. try:
  178. filament_types = data.get("filament_type", [])
  179. filament_colors = data.get("filament_colour", [])
  180. filament_is_support = data.get("filament_is_support", [])
  181. if not filament_types:
  182. return
  183. # Collect all non-support filaments
  184. non_support_types = []
  185. non_support_colors = []
  186. for i, ftype in enumerate(filament_types):
  187. is_support = filament_is_support[i] if i < len(filament_is_support) else "0"
  188. if is_support == "0":
  189. if ftype and ftype not in non_support_types:
  190. non_support_types.append(ftype)
  191. if i < len(filament_colors) and filament_colors[i]:
  192. color = filament_colors[i]
  193. if color not in non_support_colors:
  194. non_support_colors.append(color)
  195. # Fallback to first filament if all are support
  196. if not non_support_types and filament_types:
  197. non_support_types = [filament_types[0]]
  198. if not non_support_colors and filament_colors:
  199. non_support_colors = [filament_colors[0]]
  200. # Store filament type(s)
  201. if non_support_types:
  202. self.metadata["filament_type"] = ", ".join(non_support_types)
  203. # Store all colors as comma-separated (for multi-color display)
  204. if non_support_colors:
  205. self.metadata["filament_color"] = ",".join(non_support_colors)
  206. except (KeyError, ValueError, TypeError, IndexError):
  207. pass # Filament info is optional; fall back to slice_info values
  208. def _extract_print_settings(self, data: dict):
  209. """Extract print settings from JSON config."""
  210. try:
  211. # Layer height - usually an array, get first value
  212. if "layer_height" in data:
  213. val = data["layer_height"]
  214. if isinstance(val, list) and val:
  215. self.metadata["layer_height"] = float(val[0])
  216. elif isinstance(val, (int, float, str)):
  217. self.metadata["layer_height"] = float(val)
  218. # Nozzle diameter
  219. if "nozzle_diameter" in data:
  220. val = data["nozzle_diameter"]
  221. if isinstance(val, list) and val:
  222. self.metadata["nozzle_diameter"] = float(val[0])
  223. elif isinstance(val, (int, float, str)):
  224. self.metadata["nozzle_diameter"] = float(val)
  225. # Bed temperature - first layer or regular
  226. for key in ["bed_temperature_initial_layer", "bed_temperature"]:
  227. if key in data:
  228. val = data[key]
  229. if isinstance(val, list) and val:
  230. self.metadata["bed_temperature"] = int(float(val[0]))
  231. elif isinstance(val, (int, float, str)):
  232. self.metadata["bed_temperature"] = int(float(val))
  233. break
  234. # Nozzle temperature
  235. for key in ["nozzle_temperature_initial_layer", "nozzle_temperature"]:
  236. if key in data:
  237. val = data[key]
  238. if isinstance(val, list) and val:
  239. self.metadata["nozzle_temperature"] = int(float(val[0]))
  240. elif isinstance(val, (int, float, str)):
  241. self.metadata["nozzle_temperature"] = int(float(val))
  242. break
  243. # Printer model (extract and normalize)
  244. if "printer_model" in data:
  245. from backend.app.utils.printer_models import normalize_printer_model
  246. self.metadata["sliced_for_model"] = normalize_printer_model(data["printer_model"])
  247. except (KeyError, ValueError, TypeError):
  248. pass # Print settings are optional; missing values are left unset
  249. def _extract_settings_from_content(self, content: str):
  250. """Extract print settings from config content."""
  251. settings_map = {
  252. "layer_height": ("layer_height", float),
  253. "nozzle_diameter": ("nozzle_diameter", float),
  254. "bed_temperature": ("bed_temperature", int),
  255. "nozzle_temperature": ("nozzle_temperature", int),
  256. }
  257. for key, (search_key, converter) in settings_map.items():
  258. if key not in self.metadata:
  259. try:
  260. # Try JSON format
  261. if f'"{search_key}"' in content:
  262. start = content.find(f'"{search_key}"')
  263. value_start = content.find(":", start) + 1
  264. value_end = content.find(",", value_start)
  265. if value_end == -1:
  266. value_end = content.find("}", value_start)
  267. value = content[value_start:value_end].strip().strip('"')
  268. self.metadata[key] = converter(value)
  269. except (ValueError, TypeError):
  270. pass # Skip settings with unconvertible values
  271. def _parse_3dmodel(self, zf: zipfile.ZipFile):
  272. """Parse 3D/3dmodel.model for MakerWorld metadata."""
  273. import re
  274. try:
  275. model_path = "3D/3dmodel.model"
  276. if model_path not in zf.namelist():
  277. return
  278. content = zf.read(model_path).decode("utf-8", errors="ignore")
  279. # Parse XML metadata elements
  280. # MakerWorld adds metadata like: <metadata name="Designer">username</metadata>
  281. metadata_pattern = r'<metadata\s+name="([^"]+)"[^>]*>([^<]*)</metadata>'
  282. matches = re.findall(metadata_pattern, content)
  283. makerworld_fields = {}
  284. for name, value in matches:
  285. makerworld_fields[name] = value.strip()
  286. # Check for direct MakerWorld URL in content
  287. url_pattern = r'https?://makerworld\.com/[^\s<>"\']+/models/(\d+)'
  288. url_match = re.search(url_pattern, content)
  289. if url_match:
  290. self.metadata["makerworld_url"] = url_match.group(0)
  291. self.metadata["makerworld_model_id"] = url_match.group(1)
  292. # Extract model ID from DSM reference in image URLs
  293. # Format: https://makerworld.bblmw.com/makerworld/model/DSM00000001275614/...
  294. # The numeric part (1275614) is the MakerWorld model ID
  295. if "makerworld_url" not in self.metadata:
  296. dsm_pattern = r"DSM0+(\d+)"
  297. dsm_match = re.search(dsm_pattern, content)
  298. if dsm_match:
  299. model_id = dsm_match.group(1)
  300. self.metadata["makerworld_url"] = f"https://makerworld.com/en/models/{model_id}"
  301. self.metadata["makerworld_model_id"] = model_id
  302. # Store designer info
  303. if "Designer" in makerworld_fields:
  304. self.metadata["designer"] = makerworld_fields["Designer"]
  305. if "Title" in makerworld_fields:
  306. self.metadata["print_name"] = makerworld_fields["Title"]
  307. except (KeyError, ValueError, UnicodeDecodeError):
  308. pass # MakerWorld/3dmodel metadata is optional
  309. def _extract_thumbnail(self, zf: zipfile.ZipFile):
  310. """Extract thumbnail image from 3MF.
  311. If a plate_number was specified, try to use that plate's thumbnail first.
  312. """
  313. thumbnail_paths = []
  314. # If a specific plate was printed, try that thumbnail first
  315. if self.plate_number:
  316. thumbnail_paths.append(f"Metadata/plate_{self.plate_number}.png")
  317. # Fallback to default paths
  318. thumbnail_paths.extend(
  319. [
  320. "Metadata/plate_1.png",
  321. "Metadata/thumbnail.png",
  322. "Metadata/model_thumbnail.png",
  323. ]
  324. )
  325. for thumb_path in thumbnail_paths:
  326. if thumb_path in zf.namelist():
  327. self.metadata["_thumbnail_data"] = zf.read(thumb_path)
  328. self.metadata["_thumbnail_ext"] = ".png"
  329. break
  330. def extract_printable_objects_from_3mf(
  331. data: bytes, plate_number: int | None = None, include_positions: bool = False
  332. ) -> dict[int, str] | dict[int, dict] | tuple[dict[int, dict], list | None]:
  333. """Extract printable objects from 3MF file bytes.
  334. This is a lightweight function used during print start to get the list
  335. of objects that can be skipped.
  336. Args:
  337. data: Raw bytes of the 3MF file
  338. plate_number: Which plate was printed (1-based), or None for first plate
  339. include_positions: If True, return tuple of (objects dict, bbox_all)
  340. Returns:
  341. If include_positions=False: Dictionary mapping identify_id (int) to object name (str)
  342. If include_positions=True: Tuple of (dict mapping identify_id to {name, x, y}, bbox_all list or None)
  343. """
  344. import json
  345. from io import BytesIO
  346. printable_objects: dict = {}
  347. bbox_all: list | None = None
  348. try:
  349. with zipfile.ZipFile(BytesIO(data), "r") as zf:
  350. if "Metadata/slice_info.config" not in zf.namelist():
  351. return printable_objects
  352. content = zf.read("Metadata/slice_info.config").decode()
  353. root = ET.fromstring(content)
  354. # Find the correct plate
  355. if plate_number:
  356. plate = root.find(f".//plate[@plate_idx='{plate_number}']")
  357. if plate is None:
  358. plate = root.find(".//plate")
  359. else:
  360. plate = root.find(".//plate")
  361. if plate is None:
  362. return printable_objects
  363. # Get actual plate index from metadata (sliced files only have one plate)
  364. plate_idx = plate_number or 1
  365. for meta in plate.findall("metadata"):
  366. if meta.get("key") == "index":
  367. try:
  368. plate_idx = int(meta.get("value", "1"))
  369. except ValueError:
  370. pass # Use default plate_idx if value is non-numeric
  371. break
  372. # Load position data from plate_N.json if we need positions
  373. # Build a lookup by name - use list to handle duplicate names
  374. bbox_by_name: dict[str, list[list]] = {}
  375. if include_positions:
  376. plate_json_path = f"Metadata/plate_{plate_idx}.json"
  377. if plate_json_path in zf.namelist():
  378. try:
  379. plate_json = json.loads(zf.read(plate_json_path).decode())
  380. # Get bbox_all - the bounding box of all objects (used for image bounds)
  381. bbox_all = plate_json.get("bbox_all")
  382. for bbox_obj in plate_json.get("bbox_objects", []):
  383. obj_name = bbox_obj.get("name")
  384. bbox = bbox_obj.get("bbox", [])
  385. if obj_name and len(bbox) >= 4:
  386. if obj_name not in bbox_by_name:
  387. bbox_by_name[obj_name] = []
  388. bbox_by_name[obj_name].append(bbox)
  389. except (json.JSONDecodeError, KeyError):
  390. pass # Position data is optional; objects will lack x/y coordinates
  391. # Extract objects from slice_info.config
  392. for obj in plate.findall("object"):
  393. identify_id = obj.get("identify_id")
  394. name = obj.get("name")
  395. skipped = obj.get("skipped", "false")
  396. if identify_id and name and skipped.lower() != "true":
  397. try:
  398. obj_id = int(identify_id)
  399. if include_positions:
  400. x, y = None, None
  401. # Match by name - pop first bbox to handle duplicates
  402. bboxes = bbox_by_name.get(name)
  403. if bboxes:
  404. bbox = bboxes.pop(0)
  405. # Calculate center from bbox [x_min, y_min, x_max, y_max]
  406. x = (bbox[0] + bbox[2]) / 2
  407. y = (bbox[1] + bbox[3]) / 2
  408. printable_objects[obj_id] = {"name": name, "x": x, "y": y}
  409. else:
  410. printable_objects[obj_id] = name
  411. except ValueError:
  412. pass # Skip objects with non-numeric identify_id
  413. except (KeyError, ValueError, zipfile.BadZipFile, XMLParseError, UnicodeDecodeError):
  414. pass # Return empty dict if 3MF is corrupt or unreadable
  415. if include_positions:
  416. return printable_objects, bbox_all
  417. return printable_objects
  418. class ProjectPageParser:
  419. """Parser for extracting project page data from Bambu Lab 3MF files."""
  420. def __init__(self, file_path: Path):
  421. self.file_path = file_path
  422. def parse(self, archive_id: int) -> dict:
  423. """Extract project page metadata and images from 3MF file."""
  424. import html
  425. import re
  426. result = {
  427. "title": None,
  428. "description": None,
  429. "designer": None,
  430. "designer_user_id": None,
  431. "license": None,
  432. "copyright": None,
  433. "creation_date": None,
  434. "modification_date": None,
  435. "origin": None,
  436. "profile_title": None,
  437. "profile_description": None,
  438. "profile_cover": None,
  439. "profile_user_id": None,
  440. "profile_user_name": None,
  441. "design_model_id": None,
  442. "design_profile_id": None,
  443. "design_region": None,
  444. "model_pictures": [],
  445. "profile_pictures": [],
  446. "thumbnails": [],
  447. }
  448. try:
  449. with zipfile.ZipFile(self.file_path, "r") as zf:
  450. # Parse 3D/3dmodel.model for metadata
  451. model_path = "3D/3dmodel.model"
  452. if model_path in zf.namelist():
  453. content = zf.read(model_path).decode("utf-8", errors="ignore")
  454. # Extract metadata elements using regex
  455. # Format: <metadata name="Key">Value</metadata> or <metadata name="Key" />
  456. metadata_pattern = r'<metadata\s+name="([^"]+)"[^>]*>([^<]*)</metadata>'
  457. matches = re.findall(metadata_pattern, content)
  458. field_mapping = {
  459. "Title": "title",
  460. "Description": "description",
  461. "Designer": "designer",
  462. "DesignerUserId": "designer_user_id",
  463. "License": "license",
  464. "Copyright": "copyright",
  465. "CreationDate": "creation_date",
  466. "ModificationDate": "modification_date",
  467. "Origin": "origin",
  468. "ProfileTitle": "profile_title",
  469. "ProfileDescription": "profile_description",
  470. "ProfileCover": "profile_cover",
  471. "ProfileUserId": "profile_user_id",
  472. "ProfileUserName": "profile_user_name",
  473. "DesignModelId": "design_model_id",
  474. "DesignProfileId": "design_profile_id",
  475. "DesignRegion": "design_region",
  476. }
  477. for name, value in matches:
  478. if name in field_mapping:
  479. # Decode HTML entities multiple times (content is often triple-encoded)
  480. decoded = value.strip()
  481. prev = None
  482. while prev != decoded:
  483. prev = decoded
  484. decoded = html.unescape(decoded)
  485. # Normalize non-breaking spaces to regular spaces
  486. decoded = decoded.replace("\xa0", " ")
  487. result[field_mapping[name]] = decoded if decoded else None
  488. # List images in Auxiliaries folder
  489. from urllib.parse import quote
  490. for name in zf.namelist():
  491. if name.startswith("Auxiliaries/Model Pictures/"):
  492. filename = name.split("/")[-1]
  493. if filename:
  494. result["model_pictures"].append(
  495. {
  496. "name": filename,
  497. "path": name,
  498. "url": f"/api/v1/archives/{archive_id}/project-image/{quote(name, safe='')}",
  499. }
  500. )
  501. elif name.startswith("Auxiliaries/Profile Pictures/"):
  502. filename = name.split("/")[-1]
  503. if filename:
  504. result["profile_pictures"].append(
  505. {
  506. "name": filename,
  507. "path": name,
  508. "url": f"/api/v1/archives/{archive_id}/project-image/{quote(name, safe='')}",
  509. }
  510. )
  511. elif name.startswith("Auxiliaries/.thumbnails/"):
  512. filename = name.split("/")[-1]
  513. if filename:
  514. result["thumbnails"].append(
  515. {
  516. "name": filename,
  517. "path": name,
  518. "url": f"/api/v1/archives/{archive_id}/project-image/{quote(name, safe='')}",
  519. }
  520. )
  521. except (KeyError, ValueError, zipfile.BadZipFile, UnicodeDecodeError) as e:
  522. result["_error"] = str(e)
  523. return result
  524. def get_image(self, image_path: str) -> tuple[bytes, str] | None:
  525. """Extract an image from the 3MF file.
  526. Returns tuple of (image_data, content_type) or None if not found.
  527. """
  528. try:
  529. with zipfile.ZipFile(self.file_path, "r") as zf:
  530. if image_path in zf.namelist():
  531. data = zf.read(image_path)
  532. # Determine content type from extension
  533. ext = image_path.lower().split(".")[-1]
  534. content_types = {
  535. "png": "image/png",
  536. "jpg": "image/jpeg",
  537. "jpeg": "image/jpeg",
  538. "webp": "image/webp",
  539. "gif": "image/gif",
  540. }
  541. content_type = content_types.get(ext, "application/octet-stream")
  542. return (data, content_type)
  543. except (KeyError, zipfile.BadZipFile, OSError):
  544. pass # Return None if image cannot be extracted from 3MF
  545. return None
  546. def update_metadata(self, updates: dict) -> bool:
  547. """Update project page metadata in the 3MF file.
  548. Args:
  549. updates: Dict with fields to update (title, description, designer, etc.)
  550. Returns:
  551. True if successful, False otherwise.
  552. """
  553. import html
  554. import re
  555. import tempfile
  556. try:
  557. # Read the 3MF file
  558. with zipfile.ZipFile(self.file_path, "r") as zf_read:
  559. # Find and read the 3dmodel.model file
  560. model_path = "3D/3dmodel.model"
  561. if model_path not in zf_read.namelist():
  562. return False
  563. content = zf_read.read(model_path).decode("utf-8")
  564. # Update metadata fields
  565. field_mapping = {
  566. "title": "Title",
  567. "description": "Description",
  568. "designer": "Designer",
  569. "license": "License",
  570. "copyright": "Copyright",
  571. "profile_title": "ProfileTitle",
  572. "profile_description": "ProfileDescription",
  573. }
  574. for field, xml_name in field_mapping.items():
  575. if field in updates and updates[field] is not None:
  576. new_value = html.escape(updates[field])
  577. # Replace existing metadata or we'd need to add it
  578. pattern = rf'(<metadata\s+name="{xml_name}"[^>]*>)[^<]*(</metadata>)'
  579. replacement = rf"\g<1>{new_value}\g<2>"
  580. content = re.sub(pattern, replacement, content)
  581. # Write to a temporary file first
  582. with tempfile.NamedTemporaryFile(delete=False, suffix=".3mf") as tmp:
  583. tmp_path = Path(tmp.name)
  584. # Create new zip with updated content
  585. with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zf_write:
  586. for item in zf_read.namelist():
  587. if item == model_path:
  588. zf_write.writestr(item, content.encode("utf-8"))
  589. else:
  590. zf_write.writestr(item, zf_read.read(item))
  591. # Replace original file with updated one
  592. shutil.move(tmp_path, self.file_path)
  593. return True
  594. except (zipfile.BadZipFile, OSError, UnicodeDecodeError, KeyError, ValueError):
  595. # Clean up temp file if it exists
  596. if "tmp_path" in locals() and tmp_path.exists():
  597. tmp_path.unlink()
  598. return False
  599. class ArchiveService:
  600. """Service for archiving print jobs."""
  601. def __init__(self, db: AsyncSession):
  602. self.db = db
  603. @staticmethod
  604. def compute_file_hash(file_path: Path) -> str:
  605. """Compute SHA256 hash of a file for duplicate detection."""
  606. sha256 = hashlib.sha256()
  607. with open(file_path, "rb") as f:
  608. # Read in chunks to handle large files
  609. for chunk in iter(lambda: f.read(8192), b""):
  610. sha256.update(chunk)
  611. return sha256.hexdigest()
  612. async def get_duplicate_hashes(self) -> set[str]:
  613. """Get all content hashes that appear more than once.
  614. Returns a set of hashes that have duplicates.
  615. """
  616. from sqlalchemy import func
  617. result = await self.db.execute(
  618. select(PrintArchive.content_hash)
  619. .where(PrintArchive.content_hash.isnot(None))
  620. .group_by(PrintArchive.content_hash)
  621. .having(func.count(PrintArchive.id) > 1)
  622. )
  623. return {row[0] for row in result.all()}
  624. async def find_duplicates(
  625. self,
  626. archive_id: int,
  627. content_hash: str | None = None,
  628. print_name: str | None = None,
  629. makerworld_model_id: str | None = None,
  630. ) -> list[dict]:
  631. """Find duplicate archives based on hash or name matching.
  632. Returns list of dicts with id, print_name, created_at, match_type.
  633. """
  634. duplicates = []
  635. # First, find exact matches by content hash
  636. if content_hash:
  637. result = await self.db.execute(
  638. select(PrintArchive)
  639. .where(
  640. and_(
  641. PrintArchive.content_hash == content_hash,
  642. PrintArchive.id != archive_id,
  643. )
  644. )
  645. .order_by(PrintArchive.created_at.desc())
  646. .limit(10)
  647. )
  648. for archive in result.scalars().all():
  649. duplicates.append(
  650. {
  651. "id": archive.id,
  652. "print_name": archive.print_name,
  653. "created_at": archive.created_at,
  654. "match_type": "exact",
  655. }
  656. )
  657. # Then, find similar matches by print name or MakerWorld ID
  658. if print_name or makerworld_model_id:
  659. conditions = [PrintArchive.id != archive_id]
  660. name_conditions = []
  661. if print_name:
  662. # Match if print names are similar (ignoring case)
  663. name_conditions.append(PrintArchive.print_name.ilike(print_name))
  664. if makerworld_model_id:
  665. # Match by MakerWorld model ID stored in extra_data
  666. # Use json_extract for SQLite compatibility (astext is PostgreSQL-only)
  667. from sqlalchemy import func
  668. name_conditions.append(
  669. func.json_extract(PrintArchive.extra_data, "$.makerworld_model_id") == str(makerworld_model_id)
  670. )
  671. if name_conditions:
  672. conditions.append(or_(*name_conditions))
  673. result = await self.db.execute(
  674. select(PrintArchive).where(and_(*conditions)).order_by(PrintArchive.created_at.desc()).limit(10)
  675. )
  676. for archive in result.scalars().all():
  677. # Don't add if already in duplicates (exact match)
  678. if not any(d["id"] == archive.id for d in duplicates):
  679. duplicates.append(
  680. {
  681. "id": archive.id,
  682. "print_name": archive.print_name,
  683. "created_at": archive.created_at,
  684. "match_type": "similar",
  685. }
  686. )
  687. return duplicates
  688. async def archive_print(
  689. self,
  690. printer_id: int | None,
  691. source_file: Path,
  692. print_data: dict | None = None,
  693. created_by_id: int | None = None,
  694. ) -> PrintArchive | None:
  695. """Archive a 3MF file with metadata.
  696. Args:
  697. printer_id: ID of the printer (optional)
  698. source_file: Path to the 3MF file
  699. print_data: Print data from MQTT (optional)
  700. created_by_id: User ID who created this archive (optional, for user tracking)
  701. """
  702. # Verify printer exists if specified
  703. if printer_id is not None:
  704. result = await self.db.execute(select(Printer).where(Printer.id == printer_id))
  705. printer = result.scalar_one_or_none()
  706. if not printer:
  707. return None
  708. # Create archive directory structure
  709. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  710. archive_name = f"{timestamp}_{source_file.stem}"
  711. # Use "unassigned" folder for archives without a printer
  712. printer_folder = str(printer_id) if printer_id is not None else "unassigned"
  713. archive_dir = settings.archive_dir / printer_folder / archive_name
  714. archive_dir.mkdir(parents=True, exist_ok=True)
  715. # Copy 3MF file
  716. dest_file = archive_dir / source_file.name
  717. shutil.copy2(source_file, dest_file)
  718. # Compute content hash for duplicate detection
  719. content_hash = self.compute_file_hash(dest_file)
  720. # Extract plate number from filename (e.g., "plate_5" from "/data/Metadata/plate_5.gcode")
  721. plate_number = None
  722. if print_data:
  723. filename = print_data.get("filename", "")
  724. match = re.search(r"plate_(\d+)", filename)
  725. if match:
  726. plate_number = int(match.group(1))
  727. # Parse 3MF metadata
  728. parser = ThreeMFParser(dest_file, plate_number=plate_number)
  729. metadata = parser.parse()
  730. # Save thumbnail if present
  731. thumbnail_path = None
  732. if "_thumbnail_data" in metadata:
  733. thumb_file = archive_dir / f"thumbnail{metadata['_thumbnail_ext']}"
  734. thumb_file.write_bytes(metadata["_thumbnail_data"])
  735. thumbnail_path = str(thumb_file.relative_to(settings.base_dir))
  736. del metadata["_thumbnail_data"]
  737. del metadata["_thumbnail_ext"]
  738. # Merge with print data from MQTT
  739. if print_data:
  740. metadata["_print_data"] = print_data
  741. # Determine status and timestamps
  742. status = print_data.get("status", "completed") if print_data else "archived"
  743. started_at = datetime.now() if status == "printing" else None
  744. completed_at = datetime.now() if status in ("completed", "failed", "archived") else None
  745. # Calculate cost based on filament usage and type
  746. cost = None
  747. filament_grams = metadata.get("filament_used_grams")
  748. filament_type = metadata.get("filament_type")
  749. if filament_grams and filament_type:
  750. # For multi-material prints, use the first filament type for cost calculation
  751. primary_type = filament_type.split(",")[0].strip()
  752. # Look up filament cost_per_kg from database
  753. filament_result = await self.db.execute(select(Filament).where(Filament.type == primary_type).limit(1))
  754. filament = filament_result.scalar_one_or_none()
  755. if filament:
  756. cost = round((filament_grams / 1000) * filament.cost_per_kg, 2)
  757. else:
  758. # Use default filament cost from settings
  759. from backend.app.api.routes.settings import get_setting
  760. default_cost_setting = await get_setting(self.db, "default_filament_cost")
  761. default_cost_per_kg = float(default_cost_setting) if default_cost_setting else 25.0
  762. cost = round((filament_grams / 1000) * default_cost_per_kg, 2)
  763. # Calculate quantity from printable objects count
  764. # printable_objects is a dict of {identify_id: name} for non-skipped objects
  765. quantity = 1 # Default to 1
  766. printable_objects = metadata.get("printable_objects")
  767. if printable_objects and isinstance(printable_objects, dict):
  768. quantity = len(printable_objects)
  769. logger.debug("Auto-detected %s parts from 3MF printable objects", quantity)
  770. # Create archive record
  771. archive = PrintArchive(
  772. printer_id=printer_id,
  773. filename=source_file.name,
  774. file_path=str(dest_file.relative_to(settings.base_dir)),
  775. file_size=dest_file.stat().st_size,
  776. content_hash=content_hash,
  777. thumbnail_path=thumbnail_path,
  778. print_name=metadata.get("print_name") or source_file.stem,
  779. print_time_seconds=metadata.get("print_time_seconds"),
  780. filament_used_grams=metadata.get("filament_used_grams"),
  781. filament_type=metadata.get("filament_type"),
  782. filament_color=metadata.get("filament_color"),
  783. layer_height=metadata.get("layer_height"),
  784. total_layers=metadata.get("total_layers"),
  785. nozzle_diameter=metadata.get("nozzle_diameter"),
  786. bed_temperature=metadata.get("bed_temperature"),
  787. nozzle_temperature=metadata.get("nozzle_temperature"),
  788. sliced_for_model=metadata.get("sliced_for_model"),
  789. makerworld_url=metadata.get("makerworld_url"),
  790. designer=metadata.get("designer"),
  791. status=status,
  792. started_at=started_at,
  793. completed_at=completed_at,
  794. cost=cost,
  795. quantity=quantity,
  796. extra_data=metadata,
  797. created_by_id=created_by_id,
  798. )
  799. self.db.add(archive)
  800. await self.db.commit()
  801. await self.db.refresh(archive)
  802. return archive
  803. async def get_archive(self, archive_id: int) -> PrintArchive | None:
  804. """Get an archive by ID with relationships loaded."""
  805. from sqlalchemy.orm import selectinload
  806. result = await self.db.execute(
  807. select(PrintArchive)
  808. .options(selectinload(PrintArchive.created_by), selectinload(PrintArchive.project))
  809. .where(PrintArchive.id == archive_id)
  810. )
  811. return result.scalar_one_or_none()
  812. async def update_archive_status(
  813. self,
  814. archive_id: int,
  815. status: str,
  816. completed_at: datetime | None = None,
  817. failure_reason: str | None = None,
  818. ) -> bool:
  819. """Update the status of an archive."""
  820. archive = await self.get_archive(archive_id)
  821. if not archive:
  822. return False
  823. archive.status = status
  824. if completed_at:
  825. archive.completed_at = completed_at
  826. if failure_reason:
  827. archive.failure_reason = failure_reason
  828. await self.db.commit()
  829. return True
  830. async def add_reprint_cost(self, archive_id: int) -> bool:
  831. """Add cost for a reprint to the existing archive cost."""
  832. archive = await self.get_archive(archive_id)
  833. if not archive:
  834. return False
  835. if not archive.filament_used_grams or not archive.filament_type:
  836. return False
  837. # Calculate cost based on filament type or default
  838. from backend.app.api.routes.settings import get_setting
  839. primary_type = archive.filament_type.split(",")[0].strip()
  840. # Look up filament cost_per_kg from database
  841. filament_result = await self.db.execute(select(Filament).where(Filament.type == primary_type).limit(1))
  842. filament = filament_result.scalar_one_or_none()
  843. if filament:
  844. cost_per_kg = filament.cost_per_kg
  845. else:
  846. # Use default filament cost from settings
  847. default_cost_setting = await get_setting(self.db, "default_filament_cost")
  848. cost_per_kg = float(default_cost_setting) if default_cost_setting else 25.0
  849. additional_cost = round((archive.filament_used_grams / 1000) * cost_per_kg, 2)
  850. # Add to existing cost (or set if None)
  851. if archive.cost is None:
  852. archive.cost = additional_cost
  853. else:
  854. archive.cost = round(archive.cost + additional_cost, 2)
  855. await self.db.commit()
  856. logger.info("Added reprint cost %s to archive %s, new total: %s", additional_cost, archive_id, archive.cost)
  857. return True
  858. async def list_archives(
  859. self,
  860. printer_id: int | None = None,
  861. project_id: int | None = None,
  862. limit: int = 50,
  863. offset: int = 0,
  864. ) -> list[PrintArchive]:
  865. """List archives with optional filtering."""
  866. from sqlalchemy.orm import selectinload
  867. query = (
  868. select(PrintArchive)
  869. .options(selectinload(PrintArchive.project), selectinload(PrintArchive.created_by))
  870. .order_by(PrintArchive.created_at.desc())
  871. )
  872. if printer_id:
  873. query = query.where(PrintArchive.printer_id == printer_id)
  874. if project_id:
  875. query = query.where(PrintArchive.project_id == project_id)
  876. query = query.limit(limit).offset(offset)
  877. result = await self.db.execute(query)
  878. return list(result.scalars().all())
  879. async def delete_archive(self, archive_id: int) -> bool:
  880. """Delete an archive and its files."""
  881. archive = await self.get_archive(archive_id)
  882. if not archive:
  883. return False
  884. # Delete files - with CRITICAL safety checks to prevent accidental deletion
  885. # of parent directories (e.g., /opt) if file_path is empty/malformed
  886. if archive.file_path and archive.file_path.strip():
  887. file_path = settings.base_dir / archive.file_path
  888. if file_path.exists():
  889. archive_dir = file_path.parent
  890. # Safety check 1: archive_dir must be inside archive_dir
  891. try:
  892. archive_dir.resolve().relative_to(settings.archive_dir.resolve())
  893. except ValueError:
  894. logger.error(
  895. f"SECURITY: Refusing to delete archive {archive_id} - "
  896. f"path {archive_dir} is outside archive directory {settings.archive_dir}"
  897. )
  898. # Still delete the database record, just not the files
  899. await self.db.delete(archive)
  900. await self.db.commit()
  901. return True
  902. # Safety check 2: archive_dir must be at least 1 level deep inside archive_dir
  903. # (should be archive_dir/uuid/file.3mf, so parent should be archive_dir/uuid)
  904. try:
  905. relative_path = archive_dir.resolve().relative_to(settings.archive_dir.resolve())
  906. if len(relative_path.parts) < 1:
  907. logger.error(
  908. f"SECURITY: Refusing to delete archive {archive_id} - "
  909. f"path {archive_dir} is not deep enough inside archive directory"
  910. )
  911. await self.db.delete(archive)
  912. await self.db.commit()
  913. return True
  914. except ValueError:
  915. pass # Already handled above
  916. shutil.rmtree(archive_dir, ignore_errors=True)
  917. else:
  918. logger.error(
  919. f"SECURITY: Refusing to delete files for archive {archive_id} - "
  920. f"file_path is empty or invalid: '{archive.file_path}'"
  921. )
  922. # Delete database record
  923. await self.db.delete(archive)
  924. await self.db.commit()
  925. return True
  926. async def attach_timelapse(
  927. self,
  928. archive_id: int,
  929. timelapse_data: bytes,
  930. filename: str = "timelapse.mp4",
  931. ) -> bool:
  932. """Attach a timelapse video to an archive."""
  933. import asyncio
  934. archive = await self.get_archive(archive_id)
  935. if not archive:
  936. return False
  937. # Get archive directory
  938. file_path = settings.base_dir / archive.file_path
  939. archive_dir = file_path.parent
  940. # Save timelapse - use thread pool to avoid blocking event loop
  941. # (timelapse files can be 100MB+, sync write blocks for seconds)
  942. timelapse_file = archive_dir / filename
  943. await asyncio.to_thread(timelapse_file.write_bytes, timelapse_data)
  944. # Update archive record
  945. archive.timelapse_path = str(timelapse_file.relative_to(settings.base_dir))
  946. await self.db.commit()
  947. return True