Sunday, March 22, 2015

GQL to CSV exporter for Google App Engine

The App Engine admin panel will let you run GQL queries against your datastore, but it won't let you download the results as a CSV. So I wrote a [very] quick and [very] dirty handler that does, which also has the handy side effect that you can put your own access controls on it. That means you can give someone on your team read-only access to your datastore without also giving them access to the admin panel.

If you're using db instead of ndb,  I believe the main thing to change is ndb.gql() to GqlQuery().

 # Very quick and dirty example of how to provide unfettered read  
 # access to your datastore with export to CSV. Be sure to add appropriate  
 # access controls and watch out for security risks (like XSS)  
 #  
 # Don't forget to:  
 # from google.appengine.ext import ndb  
 # from google.appengine.ext.ndb import metadata  
 class GqlPage(webapp2.RequestHandler):  
  def get(self):  
   limited = True  
   row_limit = 1000  
   # Tricky to distinguish absence of 'limit' checkbox when you  
   # first hit the URL from when you submitted with an unchecked box  
   if self.request.get('download', 'nope') != 'nope' and \  
     self.request.get("limit", "nope") == "nope":  
    limited = False  
   
   query = self.request.get('query', "empty")  
   
   is_csv = False  
   if self.request.get('download') == 'Download':  
    is_csv = True  
    self.response.headers['Content-Type'] = "text/csv"  
   else:  
    self.response.write('<html><body>')  
    self.response.write('<form action="/gql" method=POST>')  
    self.response.write('<textarea name=query rows=10 cols=80 placeholder="select * from...">')  
    if query != "empty":  
     self.response.write(cgi.escape(query))  
    self.response.write('</textarea><br>')  
    self.response.write("<input type=submit name=download value=View>")  
    self.response.write('<input id=foo type=submit name=download value=Download')  
    self.response.write(' onclick="document.getElementById(\'results_div\').innerHTML=\'\';">')  
   
    self.response.write("Limit response to " + str(row_limit) + " rows:")  
    self.response.write("<input name=limit type=checkbox ")  
    if limited:  
     self.response.write("checked")  
    self.response.write('><br></form>')  
   
    self.response.write("Examples for available tables:<br>")  
    for kind in metadata.get_kinds():  
     self.response.write("select * from " + kind + "<br>")  
   
    self.response.write('<br><div id="results_div"><pre>')  
   
   if query != 'empty' and query != '':  
    results = []  
    if limited:  
     results = ndb.gql(query).fetch(row_limit)  
    else:  
     results = ndb.gql(query).fetch()  
   
    writer = csv.writer(self.response.out)  
   
    row_count = 0  
    first_row = True  
    for row in results:  
     row_dict = row.to_dict()  
     keys = sorted(row_dict.keys())  
   
     # Write column labels as first row  
     if first_row:  
      first_row = False  
      self.response.write("#")  
      writer.writerow(keys)  
   
     values = []  
     for k in keys:  
      value = str(row_dict[k])  
      if is_csv:  
       values.append(value)  
      else:  
       values.append(cgi.escape(value))  
   
     writer.writerow(values)  
   
     row_count += 1  
     if not is_csv and limited and row_count == row_limit:  
      self.response.write("\n[Truncated at " + str(row_limit) + " lines]")  
   
   if not is_csv:  
    self.response.write('</pre></div>')  
    self.response.write("</body></html>")  
   
  def post(self):  
   return self.get()  
   

No comments: