Can use extend to add a new column to a CSV? - python-3.x

I have the following code:
openfile = open('/home/testing.csv')
csvfile = csv.reader(openfile)
header = next(csvfile)
header.append('parsed_url')
headers = map((lambda x: ''+x+''), header)
for row in csvfile:
if row[22] == 'Connection Error':
print("Connection Error")
else:
url = (row[22])
parsed = urlparse.urlparse(url)
parsed_url_data = (urlparse.parse_qs(parsed.query)['text'])
row.extend(parsed_url_data)
Can I use extend to populate the new field parsed_url with the data from the variable parsed_url_data?
Either I cannot use extend, or I am using it incorrectly since parsed_url is not being written to the file testing.csv
I have looked at the python extend documentation, but I think I need to use the CSV writerrows object.
Thanks in advance for suggestions!

Related

python3 get nested dictionary/property from yaml file

I try to figure out how to get nested data as dictionary/property from yaml file.
The code below works if I provide the function with only one level.
example :
result = parse_yaml_file(config_yaml_file, 'section')
but fails if I try something like :
result = parse_yaml_file(yaml_file, 'section.sub-section')
or
result = parse_yaml_file(yaml_file, '[\'section\'][\'sub-section\']')
python3 code :
def parse_yaml_file(yml_file, section):
print('section : ' + section)
data_dict = {}
try:
with open(yml_file) as f:
data_dict = (yaml.load(f))
except (FileNotFoundError, IOError):
exit_with_error('Issue finding/opening ' + yml_file)
if not section:
return data_dict
else:
return data_dict.get(section)
result = parse_yaml_file(yaml_file, 'section.sub-section.property')
print(json.dumps(result, indent=4))
Is it possible to parse only on part/section of the yaml file ?
Or just retrieve one sub-section/property from the parsed result ?
I know I can get it from the dictionary like :
data_dict['section']['sub-section']['property']
but I want it to be flexible, and not hardcoded since the data to grab is provided as argument to the function.
Thanks a lot for your help.
You could try using a library to help search the parsed yaml file e.g. dpath
https://pypi.org/project/dpath/
import yaml
import dpath.util
def parse_yaml(yml_file, section):
with open(yml_file,'r') as f:
data_dict = yaml.load(f)
return dpath.util.search(data_dict,section)
parse_yaml('file.yml','section/sub-section')

Python CSV not writing data to file

I am running into a wall with this. I am new to writing CSV files with python and have been reading lots of different posts on the topic, but now I ran into a wall with this and could use a little help.
import csv
#headers from the read.csv file that I wan't to parse and write to the new file.
headers = ['header1', 'header5', 'header6', 'header7']
#open the write.csv file to write the data to
with open("write.csv", 'wb') as csvWriter:
writer = csv.writer(csvWriter)
#open the main data file that I want to parse data out of and write to write.csv
with open('reading.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',' )
csvList = list(readCSV)
#finds where the position of the data I want to pull out and write to write.csv
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])
for row in readCSV:
writer.writerow([row[itemCode], row[vendorName], row[supplierID], row[supplierItemCode]])
csvWriter.close()
---UPDATE---
I made the changes suggested and tried commenting out the following part of the code & changing 'wb' to 'w' and the program worked. However, I don't understand why, and how do I set this up so that I can list the header I want to pull out?
csvList = list(readCSV)
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])
Here is my updated code:
headers = ['header1', 'header5', 'header6', 'header7']
#open the write.csv file to write the data to
with open("write.csv", 'wb') as csvWriter, open('reading.csv') as csvfile:
writer = csv.writer(csvWriter)
readCSV = csv.reader(csvfile, delimiter=',' )
"""csvList = list(readCSV)
#finds where the position of the data I want to pull out and write to write.csv
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])"""
for row in readCSV:
writer.writerow([row[0], row[27], row[28], row[29]])
It looks like you want to write a subset of columns to a new file. This problem is simpler with DictReader/DictWriter. Note the correct use of open when using Python 3.x. Your attempt was using the Python 2.x way.
import csv
# headers you want in the order you want
headers = ['header1','header5','header6','header7']
with open('write.csv','w',newline='') as csvWriter,open('read.csv',newline='') as csvfile:
writer = csv.DictWriter(csvWriter,fieldnames=headers,extrasaction='ignore')
readCSV = csv.DictReader(csvfile)
writer.writeheader()
for row in readCSV:
writer.writerow(row)
Test data:
header1,header2,header3,header4,header5,header6,header7
1,2,3,4,5,6,7
11,22,33,44,55,66,77
Output:
header1,header5,header6,header7
1,5,6,7
11,55,66,77
if you want to access both writer under the same block,you should do something like this
with open("write.csv", 'wb') as csvWriter,open('reading.csv') as csvfile:
writer = csv.writer(csvWriter)
readCSV = csv.reader(csvfile, delimiter=',' )
csvList = list(readCSV)
#finds where the position of the data I want to pull out and write to write.csv
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])
for row in readCSV:
writer.writerow([row[itemCode], row[vendorName], row[supplierID], row[supplierItemCode]])
csvWriter.close()
The with open() as csvWriter: construct handles closing of the supplied file once you exit the block. So once you get down to writer.writerow, the file is already closed.
You need to enclose the entire expression in the with open block.
with open("write.csv", 'wb') as csvWriter:
....
#Do all writing within this block
....

Parsing json element and write file in python

I'm parsing a json object and I want to write Strings, which occur in the Object into a CSV file. My problem is, I just write the last object which appears into the CSV file. If I have more than 1 objects which include the String, I just get the last one which appears. Can someone help me what i need to change?
PS: I'm new to programming and still learning.
Here is my code:
def main():
split_after_in =''
split_after_dl = ''
ip = entry.get('ip', None)
text = entry.get('pluginText', None)
if 'Update' in text:
for line in text.splitlines():
if 'last downloaded' in line:
split_after_dl = line.split("last dl:",1)[1]
if 'last installed' in line
split_after_in = line.split("last in:",1)[1]
data = open('test.csv', 'w')
with data as csvfile:
fieldnames = ['IP', 'Updates dl', 'Updates in']
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)
writer.writeheader()
writer.writerow({'IP': ip, 'Updates dl': split_after_dl, 'Updates in': split_after_in})
data.close()

Exporting Python to Excel is only showing one row of data [duplicate]

I have data which is being accessed via http request and is sent back by the server in a comma separated format, I have the following code :
site= 'www.example.com'
hdr = {'User-Agent': 'Mozilla/5.0'}
req = urllib2.Request(site,headers=hdr)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page)
soup = soup.get_text()
text=str(soup)
The content of text is as follows:
april,2,5,7
may,3,5,8
june,4,7,3
july,5,6,9
How can I save this data into a CSV file.
I know I can do something along the lines of the following to iterate line by line:
import StringIO
s = StringIO.StringIO(text)
for line in s:
But i'm unsure how to now properly write each line to CSV
EDIT---> Thanks for the feedback as suggested the solution was rather simple and can be seen below.
Solution:
import StringIO
s = StringIO.StringIO(text)
with open('fileName.csv', 'w') as f:
for line in s:
f.write(line)
General way:
##text=List of strings to be written to file
with open('csvfile.csv','wb') as file:
for line in text:
file.write(line)
file.write('\n')
OR
Using CSV writer :
import csv
with open(<path to output_csv>, "wb") as csv_file:
writer = csv.writer(csv_file, delimiter=',')
for line in data:
writer.writerow(line)
OR
Simplest way:
f = open('csvfile.csv','w')
f.write('hi there\n') #Give your csv text here.
## Python will convert \n to os.linesep
f.close()
You could just write to the file as you would write any normal file.
with open('csvfile.csv','wb') as file:
for l in text:
file.write(l)
file.write('\n')
If just in case, it is a list of lists, you could directly use built-in csv module
import csv
with open("csvfile.csv", "wb") as file:
writer = csv.writer(file)
writer.writerows(text)
I would simply write each line to a file, since it's already in a CSV format:
write_file = "output.csv"
with open(write_file, "w") as output:
for line in text:
output.write(line + '\n')
I can't recall how to write lines with line-breaks at the moment, though :p
Also, you might like to take a look at this answer about write(), writelines(), and '\n'.
What about this:
with open("your_csv_file.csv", "w") as f:
f.write("\n".join(text))
str.join() Return a string which is the concatenation of the strings in iterable.
The separator between elements is
the string providing this method.

In Django how can I run a custom clean function on fixture data during import and validation?

In a ModelForm I can write a clean_<field_name> member function to automatically validate and clean up data entered by a user, but what can I do about dirty json or csv files (fixtures) during a manage.py loaddata?
Fixtures loaded with loaddata are assumed to contain clean data that doen't need validation (usually as an inverse operation to a prior dumpdata), so the short answer is that loaddata isn't the approach you want if you need to clean your inputs.
However, you probably can use some of the underpinnings of loaddata while implementing your custom data cleaning code--I'm sure you can easily script something using the Django serialization libs to read your existing data files them in and the save the resulting objects normally after the data has been cleaned up.
In case others want to do something similar, I defined a model method to do the cleaning (so it can be called from ModelForms)
MAX_ZIPCODE_DIGITS = 9
MIN_ZIPCODE_DIGITS = 5
def clean_zip_code(self, s=None):
#s = str(s or self.zip_code)
if not s: return None
s = re.sub("\D","",s)
if len(s)>self.MAX_ZIPCODE_DIGITS:
s = s[:self.MAX_ZIPCODE_DIGITS]
if len(s) in (self.MIN_ZIPCODE_DIGITS-1,self.MAX_ZIPCODE_DIGITS-1):
s = '0'+s # FIXME: deal with other intermediate lengths
if len(s)>=self.MAX_ZIPCODE_DIGITS:
s = s[:self.MIN_ZIPCODE_DIGITS]+'-'+s[self.MIN_ZIPCODE_DIGITS:]
return s
Then wrote a standalone python script to clean up my legacy json files using any clean_ methods found among the models.
import os, json
def clean_json(app = 'XYZapp', model='Entity', fields='zip_code', cleaner_prefix='clean_'):
# Set the DJANGO_SETTINGS_MODULE environment variable.
os.environ['DJANGO_SETTINGS_MODULE'] = app+".settings"
settings = __import__(app+'.settings').settings
models = __import__(app+'.models').models
fpath = os.path.join( settings.SITE_PROJECT_PATH, 'fixtures', model+'.json')
if isinstance(fields,(str,unicode)):
fields = [fields]
Ns = []
for field in fields:
try:
instance = getattr(models,model)()
except AttributeError:
print 'No model named %s could be found'%(model,)
continue
try:
cleaner = getattr(instance, cleaner_prefix+field)
except AttributeError:
print 'No cleaner method named %s.%s could be found'%(model,cleaner_prefix+field)
continue
print 'Cleaning %s using %s.%s...'%(fpath,model,cleaner.__name__)
fin = open(fpath,'r')
if fin:
l = json.load(fin)
before = len(l)
cleans = 0
for i in range(len(l)):
if 'fields' in l[i] and field in l[i]['fields']:
l[i]['fields'][field]=cleaner(l[i]['fields'][field]) # cleaner returns None to delete records
cleans += 1
fin.close()
after = len(l)
assert after>.5*before
Ns += [(before, after,cleans)]
print 'Writing %d/%d (new/old) records after %d cleanups...'%Ns[-1]
with open(fpath,'w') as fout:
fout.write(json.dumps(l,indent=2,sort_keys=True))
return Ns
if __name__ == '__main__':
clean_json()

Resources