Had an epiphany to try writing a working "Python" script using "Copilot". My prompt was basically requesting a "Python" script to ask the user to input a URL to search for either "robot.txt" or "robots.txt" and saving the file with the URL appended to the filename.
The Python code is as follows.
import requests
def check_robots_txt(url):
# Ensure the URL starts with http:// or https://
if not url.startswith(('http://', 'https://')):
url = 'http://' + url
# Check for robots.txt
robots_url = url.rstrip('/') + '/robots.txt'
response = requests.get(robots_url)
if response.status_code == 200:
print(f"Found robots.txt at {robots_url}")
print("Contents of robots.txt:")
print(response.text)
filename = f"{url.replace('http://', '').replace('https://', '').replace('/', '_')}_robots.txt"
with open(filename, 'w') as file:
file.write(response.text)
print(f"The contents have been saved to {filename}")
else:
print(f"No robots.txt found at {robots_url}")
# Check for robot.txt
robot_url = url.rstrip('/') + '/robot.txt'
response = requests.get(robot_url)
if response.status_code == 200:
print(f"Found robot.txt at {robot_url}")
print("Contents of robot.txt:")
print(response.text)
filename = f"{url.replace('http://', '').replace('https://', '').replace('/', '_')}_robot.txt"
with open(filename, 'w') as file:
file.write(response.text)
print(f"The contents have been saved to {filename}")
else:
print(f"No robot.txt found at {robot_url}")
# Input URL from the user
url = input("Enter the URL: ")
check_robots_txt(url)
Worked like a charm. Enhanced the script by adding 2 more features.
- Trying either HTTP or HTTPS.
- Allowing manual input of URL or reading from a list.
import requests
def fetch_and_save_file(url, file_type):
response = requests.get(url)
if response.status_code == 200:
print(f"Found {file_type} at {url}")
print(f"Contents of {file_type}:")
print(response.text)
filename = f"{url.replace('http://', '').replace('https://', '').replace('/', '_')}_{file_type}.txt"
with open(filename, 'w') as file:
file.write(response.text)
print(f"The contents have been saved to {filename}")
else:
print(f"No {file_type} found at {url}")
def check_robots_txt(url):
# Ensure the URL starts with http:// or https://
if not url.startswith(('http://', 'https://')):
url = 'http://' + url
# Try both http and https if the initial attempt fails
for protocol in ['http://', 'https://']:
for file_type in ['robots.txt', 'robot.txt']:
file_url = protocol + url.lstrip('http://').lstrip('https://').rstrip('/') + f'/{file_type}'
fetch_and_save_file(file_url, file_type)
def process_urls(urls):
for url in urls:
check_robots_txt(url)
if __name__ == "__main__":
try:
choice = input("Enter '1' to input URL manually or '2' to read from a file: ")
if choice == '1':
url = input("Enter the URL: ")
process_urls([url])
elif choice == '2':
file_path = input("Enter the path to the text file: ")
with open(file_path, 'r') as file:
urls = [line.strip() for line in file.readlines()]
process_urls(urls)
else:
print("Invalid choice. Please enter '1' or '2'.")
except Exception as e:
print(f"An error occurred: {e}")