replace-relative-links.yml•8.68 kB
parameters: 
  TargetFolder: ''
  RootFolder: ''
  BuildSHA: ''
  RepoId: $(Build.Repository.Name)
steps:
  - task: PythonScript@0
    displayName: Replace Relative Readme Links with Absolute References
    inputs:
      scriptSource: inline
      script: |
        import argparse
        import sys
        import os
        import logging
        import glob
        import re
        import fnmatch
        from io import open
        try:
            from pathlib import Path
        except:
            from pathlib2 import Path
        # This script is intended to be run against a single folder. All readme.md files (regardless of casing) will have the relative links
        # updated with appropriate full reference links. This is a recursive update..
        logging.getLogger().setLevel(logging.INFO)
        RELATIVE_LINK_REPLACEMENT_SYNTAX = (
            "https://github.com/{repo_id}/tree/{build_sha}/{target_resource_path}"
        )
        LINK_DISCOVERY_REGEX = r"\[([^\]]*)\]\(([^)]+)\)"
        PREDEFINED_LINK_DISCOVERY_REGEX = r"(\[[^\]]+]\:)\s*([^\s]+)"
        IMAGE_FILE_EXTENSIONS = ['.jpeg', '.jpg', '.png', '.gif', '.tiff']
        RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE = (
            "https://github.com/{repo_id}/raw/{build_sha}/{target_resource_path}"
        )
        def locate_readmes(directory):
            readme_set = []
            for root, dirs, files in os.walk(directory):
                for file in files:
                    if file.lower() == "readme.md":
                        readme_set.append(os.path.join(root, file))
            return readme_set
        def is_relative_link(link_value, readme_location):
            link_without_location = link_value
            if link_without_location.find('#') > 0:
                link_without_location = link_without_location[0:link_without_location.find('#')]
            try:
                return os.path.exists(
                    os.path.abspath(os.path.join(os.path.dirname(readme_location), link_without_location))
                )
            except:
                return False
        def replace_relative_link(match, readme_location, root_folder, build_sha, repo_id):
            link_path = match.group(2).strip()
            if is_relative_link(link_path, readme_location):
                # if it is a relative reference, we need to find the path from the root of the repository
                resource_absolute_path = os.path.abspath(
                    os.path.join(os.path.dirname(readme_location), link_path)
                )
                placement_from_root = os.path.relpath(resource_absolute_path, root_folder)
                suffix = Path(placement_from_root).suffix
                if (suffix in IMAGE_FILE_EXTENSIONS):
                    updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE.format(
                        repo_id=repo_id,
                        build_sha=build_sha,
                        target_resource_path=placement_from_root,
                    ).replace("\\", "/")
                else:
                    updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX.format(
                        repo_id=repo_id,
                        build_sha=build_sha,
                        target_resource_path=placement_from_root,
                    ).replace("\\", "/")
                return "[{}]({})".format(match.group(1), updated_link)
            else:
                return match.group(0)
        def replace_predefined_relative_links(match, readme_location, root_folder, build_sha, repo_id):
            link_path = match.group(2).strip()
            if is_relative_link(link_path, readme_location):
                # if it is a relative reference, we need to find the path from the root of the repository
                resource_absolute_path = os.path.abspath(
                    os.path.join(os.path.dirname(readme_location), link_path)
                )
                placement_from_root = os.path.relpath(resource_absolute_path, root_folder)
                suffix = Path(placement_from_root).suffix
                if (suffix in IMAGE_FILE_EXTENSIONS):
                    updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE.format(
                        repo_id=repo_id,
                        build_sha=build_sha,
                        target_resource_path=placement_from_root,
                    ).replace("\\", "/")
                else:
                    updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX.format(
                        repo_id=repo_id,
                        build_sha=build_sha,
                        target_resource_path=placement_from_root,
                    ).replace("\\", "/")
                return "{} {}".format(match.group(1), updated_link)
            else:
                return match.group(0)
        def transfer_content_to_absolute_references(
            root_folder, build_sha, repo_id, readme_location, content
        ):
            content = re.sub(
                LINK_DISCOVERY_REGEX,
                lambda match, readme_location=readme_location, root_folder=root_folder, build_sha=build_sha, repo_id=repo_id: replace_relative_link(
                    match, readme_location, root_folder, build_sha, repo_id
                ),
                content,
            )
            content = re.sub(
                PREDEFINED_LINK_DISCOVERY_REGEX,
                lambda match, readme_location=readme_location, root_folder=root_folder, build_sha=build_sha, repo_id=repo_id: replace_predefined_relative_links(
                    match, readme_location, root_folder, build_sha, repo_id
                ),
                content,
            )
            return content
        if __name__ == "__main__":
            parser = argparse.ArgumentParser(
                description="Replaces relative links for any README.md under the target folder. Given any discovered relative link, will replace with the provided repoId and SHA. Case insensitive"
            )
            parser.add_argument(
                "-t",
                "--target",
                dest="target_folder",
                help="The target folder that contains a README ",
                default="${{ parameters.TargetFolder }}",
            )
            parser.add_argument(
                "-i",
                "--repoid",
                dest="repo_id",
                help='The target repository used as the base for the path replacement. Full Id, example: "Azure/azure-sdk-for-net"',
                default="${{ parameters.RepoId }}",
            )
            parser.add_argument(
                "-r",
                "--root",
                dest="root_folder",
                help="The root directory of the repository. This gives us the ability to rationalize links in situations where a relative link traverses UPWARDS from the readme.",
                default="${{ parameters.RootFolder }}",
            )
            parser.add_argument(
                "-s",
                "--sha",
                dest="build_sha",
                help="The commit hash associated with this change. Using this will mean that links will never be broken.",
                default="${{ parameters.BuildSHA }}",
            )
            args = parser.parse_args()
            logging.info("Root Folder: {}".format(args.root_folder))
            logging.info("Target Folder: {}".format(args.target_folder))
            logging.info("Repository Id: {}".format(args.repo_id))
            logging.info("Build SHA: {}".format(args.build_sha))
            readme_files = locate_readmes(args.target_folder)
            for readme_location in readme_files:
                try:
                    logging.info(
                        "Running Relative Link Replacement on {}.".format(readme_location)
                    )
                    with open(readme_location, "r", encoding="utf-8") as readme_stream:
                        readme_content = readme_stream.read()
                    new_content = transfer_content_to_absolute_references(
                        args.root_folder,
                        args.build_sha,
                        args.repo_id,
                        readme_location,
                        readme_content,
                    )
                    with open(readme_location, "w", encoding="utf-8") as readme_stream:
                        readme_stream.write(new_content)
                        
                except Exception as e:
                    logging.error(e)
                    exit(1)
  - script: |
        git diff -U0
    displayName: Highlight Readme Updates