{"id":4755,"date":"2026-04-09T21:54:27","date_gmt":"2026-04-09T16:24:27","guid":{"rendered":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/"},"modified":"2026-04-09T21:54:27","modified_gmt":"2026-04-09T16:24:27","slug":"10-devops-best-practices-for-faster-software-delivery-2","status":"publish","type":"post","link":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/","title":{"rendered":"10 DevOps Best Practices for Faster Software Delivery"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_80 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">Table of Contents<\/p>\n<label for=\"ez-toc-cssicon-toggle-item-6a0dddc160a12\" class=\"ez-toc-cssicon-toggle-label\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/label><input type=\"checkbox\"  id=\"ez-toc-cssicon-toggle-item-6a0dddc160a12\"  aria-label=\"Toggle\" \/><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Stop_Building_Pipelines_and_Start_Building_Systems_A_Decade_of_DevOps_Regrets\" >Stop Building Pipelines and Start Building Systems: A Decade of DevOps Regrets<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_Myth_of_the_%E2%80%9CBest_Practice%E2%80%9D\" >The Myth of the &#8220;Best Practice&#8221;<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#CICD_The_Pipeline_is_Not_the_Product\" >CI\/CD: The Pipeline is Not the Product<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_Docker_Image_Bloat_Problem\" >The Docker Image Bloat Problem<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Infrastructure_as_Code_State_is_the_Enemy\" >Infrastructure as Code: State is the Enemy<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Blast_Radius_Reduction\" >Blast Radius Reduction<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_%E2%80%9CApply%E2%80%9D_Trap\" >The &#8220;Apply&#8221; Trap<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Observability_Youre_Paging_People_for_Nothing\" >Observability: You&#8217;re Paging People for Nothing<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_Prometheus_Cardinality_Explosion\" >The Prometheus Cardinality Explosion<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_Golden_Signals\" >The Golden Signals<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Kubernetes_The_Great_Complexity_Tax\" >Kubernetes: The Great Complexity Tax<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Secret_Management_Stop_Putting_API_Keys_in_Git\" >Secret Management: Stop Putting API Keys in Git<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-13\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_%E2%80%9CMicroservices%E2%80%9D_Tax\" >The &#8220;Microservices&#8221; Tax<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-14\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Security_The_%E2%80%9CShift_Left%E2%80%9D_Lie\" >Security: The &#8220;Shift Left&#8221; Lie<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-15\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_Real_World_A_%E2%80%9CGotcha%E2%80%9D_Only_Experience_Teaches\" >The Real World: A &#8220;Gotcha&#8221; Only Experience Teaches<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-16\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#The_Wrap-up\" >The Wrap-up<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-17\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#Related_Articles\" >Related Articles<\/a><\/li><\/ul><\/nav><\/div>\n<h2><span class=\"ez-toc-section\" id=\"Stop_Building_Pipelines_and_Start_Building_Systems_A_Decade_of_DevOps_Regrets\"><\/span>Stop Building Pipelines and Start Building Systems: A Decade of DevOps Regrets<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>It was 3:15 AM on a Tuesday in 2017. I was staring at a Grafana dashboard that looked like a heart monitor for a patient in cardiac arrest. We had just &#8220;automated&#8221; our deployment pipeline using a series of nested Jenkins Groovy scripts that someone\u2014probably me\u2014thought was clever. I pushed a change to a shared library, thinking it would only affect the staging environment. It didn&#8217;t. Because our &#8220;devops best&#8221; practices at the time involved a shared Jenkins master with global credentials, the script executed a <code>terraform destroy<\/code> on our production VPC. I watched, paralyzed, as 400 EC2 instances transitioned to &#8216;shutting-down&#8217; in unison. <\/p>\n<p>The recovery took fourteen hours. We didn&#8217;t have a backup of the Terraform state file because &#8220;S3 versioning is expensive.&#8221; We didn&#8217;t have a manual gate because &#8220;gates slow down velocity.&#8221; That night, I learned that most of what people call &#8220;DevOps&#8221; is just a high-speed way to shoot yourself in the foot. If you&#8217;re looking for a guide on how to use a specific tool to &#8220;transform your enterprise,&#8221; close this tab. This is about the technical trade-offs that actually keep the lights on when the hype dies down.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"The_Myth_of_the_%E2%80%9CBest_Practice%E2%80%9D\"><\/span>The Myth of the &#8220;Best Practice&#8221;<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>The industry loves the term &#8220;devops best.&#8221; It suggests there is a single, correct way to configure a YAML file. There isn&#8217;t. There are only trade-offs. Most documentation you read is written by Developer Advocates trying to show you the &#8220;Happy Path.&#8221; They show you how to deploy a &#8220;Hello World&#8221; app in five minutes. They never show you what happens when your <code>node_modules<\/code> folder hits 2GB and your CI runner runs out of disk space, or when your Kubernetes <code>LivenessProbe<\/code> starts killing healthy pods because of a temporary network blip to your database.<\/p>\n<p>Real DevOps isn&#8217;t about tools. It&#8217;s about reducing the cognitive load on the person who gets paged at 3 AM. If your &#8220;automated&#8221; system is so complex that no one can debug it under pressure, it&#8217;s not a best practice; it&#8217;s a liability. We need to stop optimizing for &#8220;speed of setup&#8221; and start optimizing for &#8220;debuggability at scale.&#8221;<\/p>\n<blockquote><p>\n    <strong>Pro-tip:<\/strong> If your CI\/CD pipeline takes more than 10 minutes to run, your developers are already on Reddit. If it takes more than 20, they\u2019ve forgotten what they were even trying to deploy.\n<\/p><\/blockquote>\n<h2><span class=\"ez-toc-section\" id=\"CICD_The_Pipeline_is_Not_the_Product\"><\/span>CI\/CD: The Pipeline is Not the Product<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Most CI pipelines are a mess of shell scripts disguised as YAML. People treat GitHub Actions or GitLab CI as a place to dump every bash command they can think of. This is how you end up in YAML-hell. You can&#8217;t local-test a GitHub Action easily. You end up pushing &#8220;test: fix typo&#8221; commits fifty times just to see if your <code>if<\/code> statement works.<\/p>\n<p>The first rule of a sane CI\/CD strategy: <strong>Keep the logic in the code, not the config.<\/strong> Your CI should just call a script or a Makefile that can be run locally. If I can&#8217;t run the exact same build command on my laptop as the CI runner, your pipeline is broken.<\/p>\n<h3><span class=\"ez-toc-section\" id=\"The_Docker_Image_Bloat_Problem\"><\/span>The Docker Image Bloat Problem<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>I see people using <code>ubuntu:latest<\/code> as a base image for a Go binary. Why? You&#8217;re shipping 200MB of vulnerabilities and unused libraries. Then they switch to <code>alpine<\/code> because it&#8217;s small, only to realize that <code>musl<\/code> vs <code>glibc<\/code> causes weird DNS resolution bugs or performance hits in Python. Use <code>debian-slim<\/code>. It\u2019s the middle ground that won&#8217;t break your heart.<\/p>\n<pre><code># BAD: The \"I don't care about layers\" approach\nFROM python:3.9\nCOPY . \/app\nRUN pip install -r \/app\/requirements.txt\nWORKDIR \/app\nCMD [\"python\", \"main.py\"]\n\n# GOOD: Optimized for caching and security\nFROM python:3.9-slim-bullseye AS builder\nRUN apt-get update &amp;&amp; apt-get install -y --no-install-recommends gcc python3-dev\nCOPY requirements.txt .\nRUN pip install --user -r requirements.txt\n\nFROM python:3.9-slim-bullseye\nWORKDIR \/app\nCOPY --from=builder \/root\/.local \/root\/.local\nCOPY . .\nENV PATH=\/root\/.local\/bin:$PATH\nUSER 1001\nCMD [\"python\", \"main.py\"]\n<\/code><\/pre>\n<p>In the &#8220;Good&#8221; example, we use a multi-stage build. We install the heavy build tools (gcc) in the first stage and throw them away. The final image is smaller and has a reduced attack surface. Also, notice <code>USER 1001<\/code>. Running as root in a container is the &#8220;devops best&#8221; way to ensure a container escape turns into a full cluster compromise. Don&#8217;t do it.<\/p>\n<ul>\n<li><strong>Layer Caching:<\/strong> Always copy your dependency files (package.json, requirements.txt) before your source code. Source code changes every minute; dependencies change every week. Don&#8217;t invalidate your cache for a comment change.<\/li>\n<li><strong>Immutability:<\/strong> Never, ever use <code>:latest<\/code>. Use the git SHA or a semantic version. When a deployment fails, you need to know exactly what code is running. <code>latest<\/code> is a moving target that makes rollbacks impossible.<\/li>\n<\/ul>\n<h2><span class=\"ez-toc-section\" id=\"Infrastructure_as_Code_State_is_the_Enemy\"><\/span>Infrastructure as Code: State is the Enemy<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Terraform is the industry standard, and I hate it as much as I love it. The biggest mistake people make is creating one giant &#8220;monolith&#8221; state file. They put their VPC, their RDS instances, their EKS cluster, and their S3 buckets in one folder. One day, you try to update a tag on an S3 bucket, Terraform gets a 403 error from the AWS API, and it marks your entire RDS instance as &#8220;tainted.&#8221; Congratulations, you just deleted your database.<\/p>\n<h3><span class=\"ez-toc-section\" id=\"Blast_Radius_Reduction\"><\/span>Blast Radius Reduction<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>You must split your state. Use <code>remote_state<\/code> data sources or Terragrunt to separate layers. Your VPC should be in its own state file. It changes once every six months. Your application-specific resources (like an SQS queue) should be in another. <\/p>\n<pre><code># Example of a dangerous Terraform pattern\nresource \"aws_db_instance\" \"prod_db\" {\n  allocated_storage = 100\n  engine            = \"postgres\"\n  # ...\n\n  lifecycle {\n    prevent_destroy = true # This is your only line of defense. Use it.\n  }\n}\n<\/code><\/pre>\n<p>If you don&#8217;t have <code>prevent_destroy = true<\/code> on your stateful resources, you are playing Russian Roulette with your career. I&#8217;ve seen a junior dev run <code>terraform apply<\/code> with a typo in a variable that triggered a replacement of a production database. The <code>prevent_destroy<\/code> flag would have caught that in the plan phase.<\/p>\n<h3><span class=\"ez-toc-section\" id=\"The_%E2%80%9CApply%E2%80%9D_Trap\"><\/span>The &#8220;Apply&#8221; Trap<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>Never run <code>terraform apply<\/code> from your local machine against production. Use a runner (GitHub Actions, Terraform Cloud, Atlantis). Why? Because your local machine has a different version of the AWS CLI, a different version of Terraform, and your cat might jump on the keyboard mid-apply. You need a consistent environment and a clear audit log of who ran what and when.<\/p>\n<ul>\n<li><strong>Locking:<\/strong> Use S3 with DynamoDB for state locking. If two people run <code>apply<\/code> at the same time without locking, your state file will corrupt. Recovering a corrupted <code>.tfstate<\/code> file is a form of torture prohibited by the Geneva Convention.<\/li>\n<li><strong>Variables:<\/strong> Stop hardcoding IDs. Use <code>data<\/code> blocks. If you hardcode <code>subnet-0a1b2c3d<\/code>, your code is useless in any other region or account.<\/li>\n<\/ul>\n<h2><span class=\"ez-toc-section\" id=\"Observability_Youre_Paging_People_for_Nothing\"><\/span>Observability: You&#8217;re Paging People for Nothing<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Most &#8220;devops best&#8221; guides tell you to monitor everything. CPU, Memory, Disk, Network. This is wrong. Monitoring CPU is mostly useless for modern, auto-scaling applications. If my CPU is at 90% but my latency is 50ms and my error rate is 0%, I don&#8217;t care. I&#8217;m sleeping. <\/p>\n<p>Stop paging your SREs for &#8220;High CPU.&#8221; Page them for &#8220;High Latency&#8221; or &#8220;Increased Error Rate.&#8221; These are <strong>Service Level Indicators (SLIs)<\/strong>. Everything else is just debugging data.<\/p>\n<h3><span class=\"ez-toc-section\" id=\"The_Prometheus_Cardinality_Explosion\"><\/span>The Prometheus Cardinality Explosion<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>I once saw a Prometheus instance OOM-killed because a developer decided to add <code>user_id<\/code> as a label to a metric. We had 1 million users. That&#8217;s 1 million unique time series. Prometheus died. The whole monitoring system went dark because of one <code>labels={\"user_id\": user.id}<\/code> line.<\/p>\n<pre><code># BAD: High cardinality\nhttp_request_duration_seconds_bucket{method=\"GET\", endpoint=\"\/api\/v1\/user\/12345\"}\n\n# GOOD: Low cardinality\nhttp_request_duration_seconds_bucket{method=\"GET\", endpoint=\"\/api\/v1\/user\/:id\"}\n<\/code><\/pre>\n<p>Keep your labels bounded. If a label can have more than 100 possible values, it probably shouldn&#8217;t be a label. Use a logging system (like ELK or Loki) for high-cardinality data, not your metrics system.<\/p>\n<h3><span class=\"ez-toc-section\" id=\"The_Golden_Signals\"><\/span>The Golden Signals<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>If you&#8217;re starting from scratch, focus on the Four Golden Signals (from the Google SRE book, which is one of the few pieces of &#8220;hype&#8221; worth reading):<\/p>\n<ol>\n<li><strong>Latency:<\/strong> The time it takes to service a request.<\/li>\n<li><strong>Traffic:<\/strong> A measure of how much demand is being placed on your system.<\/li>\n<li><strong>Errors:<\/strong> The rate of requests that fail.<\/li>\n<li><strong>Saturation:<\/strong> How &#8220;full&#8221; your service is (e.g., thread pool limits).<\/li>\n<\/ol>\n<blockquote><p>\n    <strong>Note to self:<\/strong> Dashboards are for looking at during an incident. Alerts are for waking you up. If an alert doesn&#8217;t require immediate action, it should be a weekly report, not a Slack notification.\n<\/p><\/blockquote>\n<h2><span class=\"ez-toc-section\" id=\"Kubernetes_The_Great_Complexity_Tax\"><\/span>Kubernetes: The Great Complexity Tax<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Kubernetes is the default choice now, which is a tragedy. Most companies would be better off with a few well-configured Systemd units on a plain VM, but here we are. If you must use K8s, you need to respect the <code>resources<\/code> block. <\/p>\n<p>I\u2019ve seen clusters where <code>requests<\/code> were not set. The scheduler has no idea how much room is left on a node. It packs 50 pods onto a node that can only handle 10. Then, during a traffic spike, the node hits 100% memory, the kernel starts OOM-killing processes, and usually, it kills something vital like the <code>kube-proxy<\/code> or <code>fluentd<\/code>. <\/p>\n<pre><code># A sane deployment spec\napiVersion: apps\/v1\nkind: Deployment\nmetadata:\n  name: api-service\nspec:\n  template:\n    spec:\n      containers:\n      - name: app\n        image: my-repo\/api:v1.2.3\n        resources:\n          requests:\n            memory: \"256Mi\"\n            cpu: \"250m\"\n          limits:\n            memory: \"512Mi\"\n            cpu: \"1000m\"\n        livenessProbe:\n          httpGet:\n            path: \/healthz\n            port: 8080\n          initialDelaySeconds: 15\n          periodSeconds: 20\n<\/code><\/pre>\n<p><strong>The &#8220;Gotcha&#8221;:<\/strong> CPU limits can actually slow down your app. Kubernetes uses CFS throttling to enforce CPU limits. If your app is multi-threaded, it might hit the limit in a few milliseconds and then get &#8220;throttled&#8221; for the rest of the period, leading to massive latency spikes. Many SREs (myself included) often set <code>requests.cpu<\/code> but leave <code>limits.cpu<\/code> unset, or set it very high, while always strictly limiting memory.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"Secret_Management_Stop_Putting_API_Keys_in_Git\"><\/span>Secret Management: Stop Putting API Keys in Git<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>It\u2019s 2024 and I still find <code>STRIPE_API_KEY=sk_live_...<\/code> in public GitHub repos. &#8220;But it&#8217;s a private repo!&#8221; doesn&#8217;t matter. Your CI\/CD system clones that repo. Every developer clones that repo. Your secrets are now on twenty different laptops. <\/p>\n<p>Use a real secret manager. AWS Secrets Manager, HashiCorp Vault, or even encrypted secrets in your CI provider. If you are using Kubernetes, use something like <strong>External Secrets Operator<\/strong> to sync secrets from AWS\/GCP into K8s Secrets. <\/p>\n<p>Pro-tip: If you ever see a secret in a log file, stop what you are doing. Fix the logging configuration immediately. A leaked secret in a log aggregator (like Datadog or Splunk) is a nightmare to clean up because those logs are often immutable and replicated across regions.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"The_%E2%80%9CMicroservices%E2%80%9D_Tax\"><\/span>The &#8220;Microservices&#8221; Tax<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Everyone wants to be Netflix. But you don&#8217;t have Netflix&#8217;s problems. You have &#8220;I can&#8217;t find where the bug is&#8221; problems. When you split a monolith into 50 microservices, you haven&#8217;t removed complexity; you&#8217;ve just moved it to the network. <\/p>\n<p>Now, instead of a function call, you have an HTTP request. That request can fail. It can timeout. It can be throttled. It can return a 502 because the load balancer is reconfiguring. If you don&#8217;t have <strong>Distributed Tracing<\/strong> (like Jaeger or Honeycomb), you are flying blind. You\u2019ll see a 500 error in Service A, but the actual cause is a timeout in Service F, three hops away. <\/p>\n<p>If you can&#8217;t explain why you need a microservice, you don&#8217;t need one. Build a &#8220;Modular Monolith&#8221; instead. It\u2019s easier to deploy, easier to test, and significantly cheaper to run.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"Security_The_%E2%80%9CShift_Left%E2%80%9D_Lie\"><\/span>Security: The &#8220;Shift Left&#8221; Lie<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>The industry loves to say &#8220;Shift Left,&#8221; which is just a fancy way of saying &#8220;make developers do the security team&#8217;s job.&#8221; Developers are not security experts. If you just give them a list of 500 vulnerabilities from a Snyk scan, they will ignore all of them. <\/p>\n<p>Instead of &#8220;shifting left,&#8221; provide <strong>Secure Defaults<\/strong>. Give them a base Docker image that is already hardened. Give them a Terraform module for an S3 bucket that has encryption and public access blocks enabled by default. Make the &#8220;right way&#8221; the &#8220;easy way.&#8221; If a developer has to go out of their way to make something insecure, they probably won&#8217;t do it.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"The_Real_World_A_%E2%80%9CGotcha%E2%80%9D_Only_Experience_Teaches\"><\/span>The Real World: A &#8220;Gotcha&#8221; Only Experience Teaches<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Here is something they don&#8217;t tell you in the AWS Certified Solutions Architect exam: <strong>DNS caching will ruin your life.<\/strong> <\/p>\n<p>You have a service at <code>api.stripe.com<\/code>. Your application resolves that to an IP. Many runtimes (looking at you, Java) cache that DNS resolution *forever* by default. If Stripe changes their edge IP addresses for maintenance, your app will keep trying to talk to the old, dead IP. You\u2019ll see &#8220;Connection Timeout&#8221; errors, your health checks will fail, and your service will restart. <\/p>\n<p>Always check your TTL (Time To Live) settings. In Java, you need to set <code>networkaddress.cache.ttl<\/code> in the security policy. In Kubernetes, use <code>CoreDNS<\/code> and consider a local <code>nscd<\/code> or <code>node-local-dns<\/code> cache to prevent your DNS traffic from overwhelming the cluster&#8217;s DNS service.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"The_Wrap-up\"><\/span>The Wrap-up<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>DevOps isn&#8217;t about the latest tool or the most complex pipeline. It&#8217;s about building systems that are boring. Boring systems don&#8217;t break at 3 AM. Boring systems have clear logs, predictable scaling patterns, and simple deployment paths. Stop chasing the &#8220;best practice&#8221; of the week and start focusing on the fundamentals: idempotency, observability, and blast-radius reduction. If your automation makes a mistake, it should only break a small part of your world, and it should tell you exactly why it did it. Everything else is just hype.<\/p>\n<p>Go delete a Jenkins job today. You&#8217;ll feel better.<\/p>\n<h2><span class=\"ez-toc-section\" id=\"Related_Articles\"><\/span>Related Articles<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>Explore more insights and best practices:<\/p>\n<ul>\n<li><a href=\"https:\/\/itsupportwale.com\/blog\/how-to-install-mysql-8-on-ubuntu-18-04\/\">How To Install Mysql 8 On Ubuntu 18 04<\/a><\/li>\n<li><a href=\"https:\/\/itsupportwale.com\/blog\/how-to-upgrade-to-python-3-10-on-ubuntu-18-04-and-20-04-lts\/\">How To Upgrade To Python 3 10 On Ubuntu 18 04 And 20 04 Lts<\/a><\/li>\n<li><a href=\"https:\/\/itsupportwale.com\/blog\/top-devops-best-practices-for-faster-software-delivery\/\">Top Devops Best Practices For Faster Software Delivery<\/a><\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>Stop Building Pipelines and Start Building Systems: A Decade of DevOps Regrets It was 3:15 AM on a Tuesday in 2017. I was staring at a Grafana dashboard that looked like a heart monitor for a patient in cardiac arrest. We had just &#8220;automated&#8221; our deployment pipeline using a series of nested Jenkins Groovy scripts &#8230; <a title=\"10 DevOps Best Practices for Faster Software Delivery\" class=\"read-more\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\" aria-label=\"Read more  on 10 DevOps Best Practices for Faster Software Delivery\">Read more<\/a><\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-4755","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.0 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>10 DevOps Best Practices for Faster Software Delivery - ITSupportWale<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"10 DevOps Best Practices for Faster Software Delivery - ITSupportWale\" \/>\n<meta property=\"og:description\" content=\"Stop Building Pipelines and Start Building Systems: A Decade of DevOps Regrets It was 3:15 AM on a Tuesday in 2017. I was staring at a Grafana dashboard that looked like a heart monitor for a patient in cardiac arrest. We had just &#8220;automated&#8221; our deployment pipeline using a series of nested Jenkins Groovy scripts ... Read more\" \/>\n<meta property=\"og:url\" content=\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\" \/>\n<meta property=\"og:site_name\" content=\"ITSupportWale\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/Itsupportwale-298547177495978\" \/>\n<meta property=\"article:published_time\" content=\"2026-04-09T16:24:27+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/itsupportwale.com\/blog\/wp-content\/uploads\/2021\/05\/android-chrome-512x512-1.png\" \/>\n\t<meta property=\"og:image:width\" content=\"512\" \/>\n\t<meta property=\"og:image:height\" content=\"512\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"Techie\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"Techie\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"12 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\"},\"author\":{\"name\":\"Techie\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/#\/schema\/person\/8c5a2b3d36396e0a8fd91ec8242fd46d\"},\"headline\":\"10 DevOps Best Practices for Faster Software Delivery\",\"datePublished\":\"2026-04-09T16:24:27+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\"},\"wordCount\":2113,\"commentCount\":0,\"publisher\":{\"@id\":\"https:\/\/itsupportwale.com\/blog\/#organization\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"CommentAction\",\"name\":\"Comment\",\"target\":[\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#respond\"]}]},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\",\"url\":\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\",\"name\":\"10 DevOps Best Practices for Faster Software Delivery - ITSupportWale\",\"isPartOf\":{\"@id\":\"https:\/\/itsupportwale.com\/blog\/#website\"},\"datePublished\":\"2026-04-09T16:24:27+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/itsupportwale.com\/blog\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"10 DevOps Best Practices for Faster Software Delivery\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/#website\",\"url\":\"https:\/\/itsupportwale.com\/blog\/\",\"name\":\"ITSupportWale\",\"description\":\"Tips, Tricks, Fixed-Errors, Tutorials &amp; Guides\",\"publisher\":{\"@id\":\"https:\/\/itsupportwale.com\/blog\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/itsupportwale.com\/blog\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Organization\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/#organization\",\"name\":\"itsupportwale\",\"url\":\"https:\/\/itsupportwale.com\/blog\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/#\/schema\/logo\/image\/\",\"url\":\"https:\/\/itsupportwale.com\/blog\/wp-content\/uploads\/2023\/09\/cropped-Logo-trans-without-slogan.png\",\"contentUrl\":\"https:\/\/itsupportwale.com\/blog\/wp-content\/uploads\/2023\/09\/cropped-Logo-trans-without-slogan.png\",\"width\":1119,\"height\":144,\"caption\":\"itsupportwale\"},\"image\":{\"@id\":\"https:\/\/itsupportwale.com\/blog\/#\/schema\/logo\/image\/\"},\"sameAs\":[\"https:\/\/www.facebook.com\/Itsupportwale-298547177495978\"]},{\"@type\":\"Person\",\"@id\":\"https:\/\/itsupportwale.com\/blog\/#\/schema\/person\/8c5a2b3d36396e0a8fd91ec8242fd46d\",\"name\":\"Techie\",\"sameAs\":[\"https:\/\/itsupportwale.com\",\"iswblogadmin\"],\"url\":\"https:\/\/itsupportwale.com\/blog\/author\/iswblogadmin\/\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"10 DevOps Best Practices for Faster Software Delivery - ITSupportWale","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/","og_locale":"en_US","og_type":"article","og_title":"10 DevOps Best Practices for Faster Software Delivery - ITSupportWale","og_description":"Stop Building Pipelines and Start Building Systems: A Decade of DevOps Regrets It was 3:15 AM on a Tuesday in 2017. I was staring at a Grafana dashboard that looked like a heart monitor for a patient in cardiac arrest. We had just &#8220;automated&#8221; our deployment pipeline using a series of nested Jenkins Groovy scripts ... Read more","og_url":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/","og_site_name":"ITSupportWale","article_publisher":"https:\/\/www.facebook.com\/Itsupportwale-298547177495978","article_published_time":"2026-04-09T16:24:27+00:00","og_image":[{"width":512,"height":512,"url":"https:\/\/itsupportwale.com\/blog\/wp-content\/uploads\/2021\/05\/android-chrome-512x512-1.png","type":"image\/png"}],"author":"Techie","twitter_card":"summary_large_image","twitter_misc":{"Written by":"Techie","Est. reading time":"12 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#article","isPartOf":{"@id":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/"},"author":{"name":"Techie","@id":"https:\/\/itsupportwale.com\/blog\/#\/schema\/person\/8c5a2b3d36396e0a8fd91ec8242fd46d"},"headline":"10 DevOps Best Practices for Faster Software Delivery","datePublished":"2026-04-09T16:24:27+00:00","mainEntityOfPage":{"@id":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/"},"wordCount":2113,"commentCount":0,"publisher":{"@id":"https:\/\/itsupportwale.com\/blog\/#organization"},"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/","url":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/","name":"10 DevOps Best Practices for Faster Software Delivery - ITSupportWale","isPartOf":{"@id":"https:\/\/itsupportwale.com\/blog\/#website"},"datePublished":"2026-04-09T16:24:27+00:00","breadcrumb":{"@id":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/itsupportwale.com\/blog\/10-devops-best-practices-for-faster-software-delivery-2\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/itsupportwale.com\/blog\/"},{"@type":"ListItem","position":2,"name":"10 DevOps Best Practices for Faster Software Delivery"}]},{"@type":"WebSite","@id":"https:\/\/itsupportwale.com\/blog\/#website","url":"https:\/\/itsupportwale.com\/blog\/","name":"ITSupportWale","description":"Tips, Tricks, Fixed-Errors, Tutorials &amp; Guides","publisher":{"@id":"https:\/\/itsupportwale.com\/blog\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/itsupportwale.com\/blog\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/itsupportwale.com\/blog\/#organization","name":"itsupportwale","url":"https:\/\/itsupportwale.com\/blog\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/itsupportwale.com\/blog\/#\/schema\/logo\/image\/","url":"https:\/\/itsupportwale.com\/blog\/wp-content\/uploads\/2023\/09\/cropped-Logo-trans-without-slogan.png","contentUrl":"https:\/\/itsupportwale.com\/blog\/wp-content\/uploads\/2023\/09\/cropped-Logo-trans-without-slogan.png","width":1119,"height":144,"caption":"itsupportwale"},"image":{"@id":"https:\/\/itsupportwale.com\/blog\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/Itsupportwale-298547177495978"]},{"@type":"Person","@id":"https:\/\/itsupportwale.com\/blog\/#\/schema\/person\/8c5a2b3d36396e0a8fd91ec8242fd46d","name":"Techie","sameAs":["https:\/\/itsupportwale.com","iswblogadmin"],"url":"https:\/\/itsupportwale.com\/blog\/author\/iswblogadmin\/"}]}},"_links":{"self":[{"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/posts\/4755","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/comments?post=4755"}],"version-history":[{"count":0,"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/posts\/4755\/revisions"}],"wp:attachment":[{"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/media?parent=4755"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/categories?post=4755"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/itsupportwale.com\/blog\/wp-json\/wp\/v2\/tags?post=4755"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}