Avid readers, I bring to you a nice little update to one of my more popular posts on how to deploy a Hugo website from GitHub to S3 using GitHub Webhooks, API Gateway and Lambda.
Since that post, AWS has stated that they will deprecate support for Python 2.7 starting 31st December 2020. Not only that, the requests module has been removed from the AWS SDK for Python so the file downloads that happen in my Lambda function stopped working.
To solve this, I switched my Lambda function to the Python 3.7 runtime, and I’ve had to write another Python function that handled file downloads using the urllib3 module. It’s not as elegant as it once was but it does the job and it does a damn fine job at that:
1def downloadFromUrl(sourceUrl, destFile):
2 http = urllib3.PoolManager()
3 r = http.request('GET', sourceUrl, preload_content=False)
4 with open(destFile, 'wb') as out:
5 while True:
6 data = r.read(64)
7 if not data:
8 break
9 out.write(data)
10 r.release_conn()
Place this function at the top of the script (after importing modules of course) to start using it. You’ll also want to remove references to the requests module throughout the script, specifically in the downloadHugo function. The highlighted lines here:
def downloadHugo(repo):
logger.info("Downloading latest Hugo")
pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz") # Setting the RegEx to grab what we need from the Assets array
response = requests.get("https://api.github.com/repos/gohugoio/hugo/releases/latest") # GitHub API for the releases
release = response.json()
assets = release["assets"]
for asset in assets:
if pattern.match(asset["name"]):
downloadUrl = asset["browser_download_url"] # Grab the download URL for the Asset
logger.info("Value of downloadUrl: " + downloadUrl)
urllib.urlretrieve(downloadUrl, '/tmp/hugo.tar.gz') # Download the file
logger.info("Hugo download complete")
logger.info("Extracting Hugo")
tar = tarfile.open("/tmp/hugo.tar.gz")
tar.extractall("/tmp/" + repo + "-master")
tar.close()
Need to be updated to this:
def downloadHugo(repo):
logger.info("Downloading latest Hugo")
pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz")
http = urllib3.PoolManager()
r = http.request('GET', "https://api.github.com/repos/gohugoio/hugo/releases/latest", preload_content=False, headers={'User-Agent': environ['GITHUB_ACCOUNT']})
assets = json.loads(r.data.decode('utf-8'))["assets"]
for asset in assets:
if pattern.match(asset["name"]):
downloadUrl = asset["browser_download_url"]
logger.info("Value of downloadUrl: " + downloadUrl)
downloadFromUrl(downloadUrl, '/tmp/hugo.tar.gz')
logger.info("Hugo download complete")
logger.info("Extracting Hugo")
tar = tarfile.open("/tmp/hugo.tar.gz")
tar.extractall("/tmp/" + repo + "-master")
tar.close()
r.release_conn()
You’ll see I didn’t re-use the download function here.. I didn’t build the download function to deal with headers, so I just skipped over it and wrote the code raw anyway. GitHub required a User-Agent so I just threw it in.
Anyway, here’s the full script!
1import logging
2import os
3from zipfile import ZipFile
4import json
5import tarfile
6import re
7import boto3
8import mimetypes
9import urllib3
10
11logger = logging.getLogger()
12logger.setLevel(logging.INFO)
13
14def downloadFromUrl(sourceUrl, destFile):
15 http = urllib3.PoolManager()
16 r = http.request('GET', sourceUrl, preload_content=False)
17 with open(destFile, 'wb') as out:
18 while True:
19 data = r.read(64)
20 if not data:
21 break
22 out.write(data)
23 r.release_conn()
24
25def downloadSite(account, repo):
26 logger.info("Downloading master zip of " + repo + " from GitHub")
27 url = 'https://github.com/' + account + '/' + repo + '/archive/master.zip'
28 logger.info(url)
29 siteZip = "/tmp/master.zip"
30 downloadFromUrl(url,siteZip)
31
32 with ZipFile(siteZip, 'r') as zip:
33 logger.info("Extracting site files now")
34 zip.extractall("/tmp")
35 logger.info("Extraction complete!")
36
37def downloadHugo(repo):
38 logger.info("Downloading latest Hugo")
39 pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz")
40 http = urllib3.PoolManager()
41 r = http.request('GET', "https://api.github.com/repos/gohugoio/hugo/releases/latest", preload_content=False, headers={'User-Agent': os.environ['GITHUB_ACCOUNT']})
42 assets = json.loads(r.data.decode('utf-8'))["assets"]
43 for asset in assets:
44 if pattern.match(asset["name"]):
45 downloadUrl = asset["browser_download_url"]
46 logger.info("Value of downloadUrl: " + downloadUrl)
47 downloadFromUrl(downloadUrl, '/tmp/hugo.tar.gz')
48 logger.info("Hugo download complete")
49 logger.info("Extracting Hugo")
50 tar = tarfile.open("/tmp/hugo.tar.gz")
51 tar.extractall("/tmp/" + repo + "-master")
52 tar.close()
53 r.release_conn()
54
55def buildSite(repo):
56 logger.info("Building site")
57 os.chdir("/tmp/" + repo + "-master")
58 os.system('./hugo')
59 logger.info("Site built with Hugo")
60 buildDir = os.getcwd() + "/public"
61 return buildDir
62
63def syncS3(path, s3Bucket):
64 # Copied from https://www.developerfiles.com/upload-files-to-s3-with-python-keeping-the-original-folder-structure/
65 logger.info("Copying to S3")
66 session = boto3.Session()
67 s3 = session.resource('s3')
68 bucket = s3.Bucket(s3Bucket)
69 logger.info("Emptying bucket first")
70 bucket.objects.all().delete()
71 mimetypes.init()
72 mimetypes.types_map['.css'] = 'text/css'
73
74 for subdir, dirs, files in os.walk(path):
75 for file in files:
76 full_path = os.path.join(subdir, file)
77 _, ext = os.path.splitext(full_path)
78 mimetype = mimetypes.types_map[ext]
79 with open(full_path, 'rb') as data:
80 bucket.put_object(Key=full_path[len(path)+1:], Body=data, ContentType=mimetype)
81 logger.info("Generated site uploaded to S3 successfully.")
82
83def lambda_handler(event, context):
84 sourceRepo = os.environ['GITHUB_REPO']
85 gitAccount = os.environ['GITHUB_ACCOUNT']
86 targetBucket = os.environ['TARGET_BUCKET']
87
88 downloadSite(gitAccount, sourceRepo)
89 downloadHugo(sourceRepo)
90 buildDir = buildSite(sourceRepo)
91 syncS3(buildDir, targetBucket)
92
93 response = {
94 'statusCode': 200,
95 'body': "Site deployed successfully"
96 }
97
98 return response
If you have any feedback or questions please feel free to comment.
- Restore of a Wordpress deployment is stuck redirecting the port
- Backups and Restores using Velero in TKGm 1.6.1
- Unable to upgrade the database: org.postgresql.util.PSQLException: ERROR: could not open shared memory segment: No such file or directory
- Upgrading Cloud Director 10.4.1 to 10.5
- Installing and Configuring Velero in TKGm 1.6.1 on vSphere